def test_deletes_dependencies(tmpdir_setup):
    graph = dependency.Graph()
    graph.add('a', 'b')
    assert len(graph.dependencies('a')) == 1
    graph = dependency.Graph()
    assert len(graph.dependencies('a')) == 1

    CliRunner().invoke(clear)
    graph = dependency.Graph()
    assert len(graph.dependencies('a')) == 0
Esempio n. 2
0
    def test_stale_layers(self):
        """We should have dependencies between all of the layers and their
        associated trees. We should also tie the meta layer to the version"""
        configured_layers = {'cfr': {'keyterms': None, 'other': None}}
        with self.cli.isolated_filesystem(), patch.dict(
                layers.LAYER_CLASSES, configured_layers):
            version_entry = entry.Version(111, 22, 'aaa')
            version_entry.write(Version('aaa', date.today(), date.today()))
            tree_entry = entry.Tree(111, 22, 'aaa')
            # Use list() to instantiate
            self.assertRaises(dependency.Missing, list,
                              layers.stale_layers(tree_entry, 'cfr'))

            entry.Entry('tree', 111, 22, 'bbb').write(b'')  # wrong version
            self.assertRaises(dependency.Missing, list,
                              layers.stale_layers(tree_entry, 'cfr'))

            entry.Entry('tree', 111, 22, 'aaa').write(b'')
            six.assertCountEqual(self, layers.stale_layers(tree_entry, 'cfr'),
                                 ['keyterms', 'other'])

            self.assertIn(
                str(version_entry),
                dependency.Graph().dependencies(
                    str(entry.Layer.cfr(111, 22, 'aaa', 'meta'))))
    def test_dependencies(self, notice_xmls_for_url):
        """If the xml comes from a local source, we should expect a dependency
        be present. Otherwise, we should expect no dependency"""
        cli = CliRunner()
        self.expect_common_json()
        notice_xmls_for_url.return_value = [self.example_xml(source='./here')]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            entry_str = str(entry.Notice() / '1234-5678')
            self.assertIn(entry_str, dependency.Graph())

        notice_xmls_for_url.return_value[0].source = 'http://example.com'
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            entry_str = str(entry.Notice() / '1234-5678')
            self.assertNotIn(entry_str, dependency.Graph())
Esempio n. 4
0
def fetch_annual_edition(cfr_title, cfr_part, year):
    """Download an annual edition of a regulation"""
    volume = annual.find_volume(year, cfr_title, cfr_part)
    xml = volume.find_part_xml(cfr_part).preprocess()
    annual_entry = entry.Annual(cfr_title, cfr_part, year)
    annual_entry.write(xml)
    if xml.source_is_local:
        dependency.Graph().add(str(annual_entry), xml.source)
Esempio n. 5
0
def generate_dependencies(version_dir, version_ids, delays_by_version):
    """Creates a dependency graph and adds all dependencies for input xml and
    delays between notices"""
    notice_dir = entry.Notice()
    deps = dependency.Graph()
    for version_id in version_ids:
        deps.add(version_dir / version_id, notice_dir / version_id)
    for delayed, delay in delays_by_version.items():
        deps.add(version_dir / delayed, notice_dir / delay.by)
    return deps
Esempio n. 6
0
def preprocess_notice(document_number):
    """Preprocess notice XML. Either fetch from the Federal Register or read a
    notice from disk. Apply some common transformations to it and output the
    resulting file(s). There may be more than one as documents might be split
    if they have multiple effective dates."""
    meta = federalregister.meta_data(
        document_number, [
            "agencies",
            "docket_ids",
            "effective_on",
            "cfr_references",
            "comments_close_on",
            "full_text_xml_url",
            "html_url",
            "publication_date",
            "regulation_id_numbers",
            "volume"
        ])
    notice_xmls = list(notice_xmls_for_url(document_number,
                                           meta['full_text_xml_url']))
    deps = dependency.Graph()
    for notice_xml in notice_xmls:
        notice_xml.published = meta['publication_date']
        notice_xml.fr_volume = meta['volume']
        if meta.get('html_url'):
            notice_xml.fr_html_url = meta['html_url']
        if meta.get("comments_close_on"):
            notice_xml.comments_close_on = meta["comments_close_on"]
        if meta.get('regulation_id_numbers'):
            notice_xml.rins = meta['regulation_id_numbers']
        if meta.get('docket_ids'):
            notice_xml.docket_ids = meta['docket_ids']

        notice_xml.set_agencies(meta.get('agencies', []))

        cfr_refs = convert_cfr_refs(meta.get('cfr_references', []))
        if cfr_refs:
            notice_xml.cfr_refs = cfr_refs

        file_name = document_number
        if len(notice_xmls) > 1:
            effective_date = notice_xml.derive_effective_date()
            file_name = split_doc_num(document_number,
                                      effective_date.isoformat())
        elif meta.get('effective_on'):
            notice_xml.effective = meta['effective_on']

        notice_xml.version_id = file_name
        notice_xml.derive_where_needed()

        notice_entry = entry.Notice(file_name)
        notice_entry.write(notice_xml)
        if notice_xml.source_is_local:
            deps.add(str(notice_entry), notice_xml.source)
Esempio n. 7
0
 def test_dependencies_remote(self, notice_xmls_for_url):
     """If the xml comes from a remote source, we should not see a
     dependency"""
     cli = CliRunner()
     self.expect_common_json()
     notice_xmls_for_url.return_value = [self.example_xml(source='./here')]
     notice_xmls_for_url.return_value[0].source = 'http://example.com'
     with cli.isolated_filesystem():
         cli.invoke(preprocess_notice, ['1234-5678'])
         entry_str = str(entry.Notice() / '1234-5678')
         assert len(dependency.Graph().dependencies(entry_str)) == 0
Esempio n. 8
0
def is_stale(cfr_title, cfr_part, version_id):
    """Modify and process dependency graph related to a single SxS layer"""
    deps = dependency.Graph()
    layer_entry = entry.Layer(cfr_title, cfr_part, version_id, 'analyses')

    # Layers depend on their associated tree
    deps.add(layer_entry, entry.Tree(cfr_title, cfr_part, version_id))
    # And on all notices which came before
    for sxs_entry in previous_sxs(cfr_title, cfr_part, version_id):
        deps.add(layer_entry, sxs_entry)

    deps.validate_for(layer_entry)
    return deps.is_stale(layer_entry)
Esempio n. 9
0
def notice_preamble(doc_number):
    """Pull down and parse the preamble from this notice."""
    logger.info("Parsing Preamble for %s", doc_number)
    preamble_path = entry.Preamble(convert_id(doc_number))
    notice_path = entry.Notice(doc_number)

    deps = dependency.Graph()
    deps.add(preamble_path, notice_path)
    deps.validate_for(preamble_path)

    if deps.is_stale(preamble_path):
        preamble = parse_preamble(notice_path.read())
        preamble_path.write(preamble)
Esempio n. 10
0
    def test_derived_from_rules(self):
        """Should filter a set of version ids to only those with a dependency
        on changes derived from a rule"""
        with self.cli.isolated_filesystem():
            tree_dir = entry.Tree('12', '1000')

            deps = dependency.Graph()
            deps.add(tree_dir / 111, entry.Annual(12, 1000, 2001))
            deps.add(tree_dir / 222, entry.RuleChanges(222))
            deps.add(tree_dir / 333, entry.RuleChanges(333))
            deps.add(tree_dir / 333, entry.Version(333))
            derived = fill_with_rules.derived_from_rules(
                ['111', '222', '333', '444'], deps, tree_dir)
            self.assertEqual(derived, ['222', '333'])
Esempio n. 11
0
def test_is_derived():
    """Should filter version ids to only those with a dependency on
    changes derived from a rule"""
    tree_dir = entry.Tree('12', '1000')

    deps = dependency.Graph()
    deps.add(tree_dir / 111, entry.Annual(12, 1000, 2001))
    deps.add(tree_dir / 222, entry.Notice(222))
    deps.add(tree_dir / 333, entry.Notice(333))
    deps.add(tree_dir / 333, entry.Version(333))
    assert not fill_with_rules.is_derived('111', deps, tree_dir)
    assert fill_with_rules.is_derived('222', deps, tree_dir)
    assert fill_with_rules.is_derived('333', deps, tree_dir)
    assert not fill_with_rules.is_derived('444', deps, tree_dir)
def write_if_stale(notice_xml):
    """We only want to write out the processed xml if it is "stale", i.e. if
    its source has changed"""
    deps = dependency.Graph()
    notice_entry = entry.Notice(notice_xml.version_id)

    new_notice = notice_entry not in deps
    diff_source = notice_xml.source not in deps.dependencies(notice_xml)
    source_changed = deps.is_stale(notice_entry)

    if new_notice or diff_source or source_changed:
        deps.clear_for(notice_entry)
        deps.add(notice_entry, notice_xml.source)
        notice_entry.write(notice_xml)
def process_tree_if_needed(cfr_title, cfr_part, version_id):
    """Creates and writes a regulation tree if the appropriate notice
    exists"""
    notice_entry = entry.Notice(version_id)
    tree_entry = entry.Tree(cfr_title, cfr_part, version_id)

    deps = dependency.Graph()
    deps.add(tree_entry, notice_entry)
    deps.validate_for(tree_entry)

    if deps.is_stale(tree_entry):
        notice_xml = notice_entry.read()
        tree = build_tree(regtext_for_part(notice_xml, cfr_title, cfr_part))
        tree_entry.write(tree)
def process_version_if_needed(cfr_title, cfr_part, version_id):
    """Creates and writes a version struct after validating the Notice has
    been created"""
    notice_entry = entry.Notice(version_id)
    version_entry = entry.Version(cfr_title, cfr_part, version_id)

    deps = dependency.Graph()
    deps.add(version_entry, notice_entry)
    deps.validate_for(version_entry)

    if deps.is_stale(version_entry):
        notice_xml = notice_entry.read()
        version = Version(version_id, notice_xml.effective,
                          notice_xml.fr_citation)
        version_entry.write(version)
Esempio n. 15
0
    def test_is_derived(self):
        """Should filter version ids to only those with a dependency on
        changes derived from a rule"""
        with self.cli.isolated_filesystem():
            tree_dir = entry.Tree('12', '1000')

            deps = dependency.Graph()
            deps.add(tree_dir / 111, entry.Annual(12, 1000, 2001))
            deps.add(tree_dir / 222, entry.Notice(222))
            deps.add(tree_dir / 333, entry.Notice(333))
            deps.add(tree_dir / 333, entry.Version(333))
            self.assertFalse(fill_with_rules.is_derived('111', deps, tree_dir))
            self.assertTrue(fill_with_rules.is_derived('222', deps, tree_dir))
            self.assertTrue(fill_with_rules.is_derived('333', deps, tree_dir))
            self.assertFalse(fill_with_rules.is_derived('444', deps, tree_dir))
    def test_dependencies_serialized(self):
        """Every instance of dependency.Graph shares a serialized copy of the
        dependencies"""
        with self.dependency_graph() as dgraph:
            dgraph.add(self.depender, self.dependency / '1')
            dgraph.add(self.depender, self.dependency / '2')
            six.assertCountEqual(
                self,
                dgraph.dependencies(str(self.depender)),
                [str(self.dependency / 1), str(self.dependency / 2)])

            six.assertCountEqual(
                self,
                dependency.Graph().dependencies(str(self.depender)),
                [str(self.dependency / 1), str(self.dependency / 2)])
def dependencies(tree_path, version_ids, cfr_title, cfr_part):
    """Set up the dependency graph for this regulation. First calculates
    "gaps" -- versions for which there is no existing tree. In this
    calculation, we ignore the first version, as we won't be able to build
    anything for it. Add dependencies for any gaps, tying the output tree to
    the preceding tree, the version info and the parsed rule"""
    existing_ids = set(tree_path)
    gaps = [(prev, curr) for prev, curr in zip(version_ids, version_ids[1:])
            if curr not in existing_ids]

    deps = dependency.Graph()
    for prev, curr in gaps:
        deps.add(tree_path / curr, tree_path / prev)
        deps.add(tree_path / curr, entry.RuleChanges(curr))
        deps.add(tree_path / curr, entry.Version(cfr_title, cfr_part, curr))
    return deps
Esempio n. 18
0
def process_if_needed(volume, cfr_part):
    """Review dependencies; if they're out of date, parse the annual edition
    into a tree and store that"""
    version_id = _version_id(volume.year, cfr_part)
    annual_entry = entry.Annual(volume.title, cfr_part, volume.year)
    tree_entry = entry.Tree(volume.title, cfr_part, version_id)
    notice_entry = entry.Notice(version_id)

    deps = dependency.Graph()
    deps.add(tree_entry, annual_entry)
    deps.validate_for(tree_entry)
    if deps.is_stale(tree_entry):
        tree = xml_parser.reg_text.build_tree(annual_entry.read().xml)
        tree_entry.write(tree)
        notice_entry.write(
            build_fake_notice(version_id, volume.publication_date,
                              volume.title, cfr_part))
Esempio n. 19
0
def stale_layers(doc_entry, doc_type):
    """Return the name of layer dependencies which are now stale. Limit to a
    particular doc_type"""
    deps = dependency.Graph()
    layer_dir = entry.Layer(doc_type, *doc_entry.path)
    for layer_name in LAYER_CLASSES[doc_type]:
        # Layers depend on their associated tree
        deps.add(layer_dir / layer_name, doc_entry)
    if doc_type == 'cfr':
        # Meta layer also depends on the version info
        deps.add(layer_dir / 'meta', entry.Version(*doc_entry.path))

    for layer_name in LAYER_CLASSES[doc_type]:
        layer_entry = layer_dir / layer_name
        deps.validate_for(layer_entry)
        if deps.is_stale(layer_entry):
            yield layer_name
    def test_rebuild(self):
        """Validate that the `rebuild()` method calculates the correct
        "stale" references"""
        with CliRunner().isolated_filesystem():
            graph = dependency.Graph()

            path = entry.Entry('path')
            a, b, c, d = [path / char for char in 'abcd']
            # (A, B) -> C -> D
            graph.add(c, a)
            graph.add(c, b)
            graph.add(d, c)

            # None of the files exist yet; A & B have no dependencies, so they
            # are stale due to themselves. C & D are stale due either A or B
            self.assert_rebuilt_state(graph,
                                      path,
                                      a='a',
                                      b='b',
                                      c='ab',
                                      d='ab')

            b.write(b'bbb')
            # B exists now, so dependency errors are only due to A now
            self.assert_rebuilt_state(graph, path, a='a', b='', c='a', d='a')

            a.write(b'aaa')
            # A exists now, too, so C is the bottleneck
            self.assert_rebuilt_state(graph, path, a='', b='', c='c', d='c')

            c.write(b'ccc')
            # Now there's only the final, self-reference
            self.assert_rebuilt_state(graph, path, a='', b='', c='', d='d')

            d.write(b'ddd')
            # Now no one is stale
            self.assert_rebuilt_state(graph, path, a='', b='', c='', d='')

            self._touch(a, 1000)
            # A's been updated. Need to run everything after it
            self.assert_rebuilt_state(graph, path, a='', b='', c='a', d='a')

            self._touch(d, 2000)
            self._touch(c, 3000)
            # C and D have been updated, but C's been updated after D
            self.assert_rebuilt_state(graph, path, a='', b='', c='', d='c')
Esempio n. 21
0
def dependencies(tree_dir, version_dir, versions_with_parents):
    """Set up the dependency graph for this regulation. First calculates
    "gaps" -- versions for which there is no existing tree. In this
    calculation, we ignore the first version, as we won't be able to build
    anything for it. Add dependencies for any gaps, tying the output tree to
    the preceding tree, the version info and the parsed rule"""
    existing_tree_ids = set(tree_dir)
    versions_with_parents = versions_with_parents[1:]
    gaps = [(version, parent) for (version, parent) in versions_with_parents
            if version.identifier not in existing_tree_ids]

    deps = dependency.Graph()
    for version, parent in gaps:
        doc_number = version.identifier
        deps.add(tree_dir / doc_number, tree_dir / parent.identifier)
        deps.add(tree_dir / doc_number, entry.Notice(doc_number))
        deps.add(tree_dir / doc_number, version_dir / doc_number)
    return deps
Esempio n. 22
0
def parse_rule_changes(document_number):
    """Parse changes present in a single rule.

    DOCUMENT_NUMBER is the identifier associated with a final rule. If a rule
    has been split, use the split identifiers, a.k.a. version ids."""
    rule_entry = entry.RuleChanges(document_number)
    notice_entry = entry.Notice(document_number)

    deps = dependency.Graph()
    deps.add(rule_entry, notice_entry)

    deps.validate_for(rule_entry)
    # We don't check for staleness as we want to always execute when given a
    # specific file to process

    notice_xml = notice_entry.read()
    notice = process_amendments({'cfr_parts': notice_xml.cfr_parts},
                                notice_xml.xml)
    rule_entry.write(notice)
Esempio n. 23
0
def process_if_needed(cfr_title, cfr_part, last_version_list):
    """Calculate dependencies between input and output files for these annual
    editions. If an output is missing or out of date, process it"""
    annual_path = entry.Annual(cfr_title, cfr_part)
    tree_path = entry.Tree(cfr_title, cfr_part)
    version_path = entry.Version(cfr_title, cfr_part)
    deps = dependency.Graph()

    for last_version in last_version_list:
        deps.add(tree_path / last_version.version_id,
                 version_path / last_version.version_id)
        deps.add(tree_path / last_version.version_id,
                 annual_path / last_version.year)

    for last_version in last_version_list:
        tree_entry = tree_path / last_version.version_id
        deps.validate_for(tree_entry)
        if deps.is_stale(tree_entry):
            input_entry = annual_path / last_version.year
            tree = gpo_cfr.builder.build_tree(input_entry.read().xml)
            tree_entry.write(tree)
Esempio n. 24
0
def fetch_sxs(document_number):
    """Fetch and parse Section-by-Section analyses.

    DOCUMENT_NUMBER is the identifier associated with a final rule. If a rule
    has been split, use the split identifiers, a.k.a. version ids"""
    sxs_entry = entry.SxS(document_number)
    notice_entry = entry.Notice(document_number)

    deps = dependency.Graph()
    deps.add(sxs_entry, notice_entry)

    deps.validate_for(sxs_entry)
    # We don't check for staleness as we want to always execute when given a
    # specific file to process

    # @todo - break apart processing of SxS. We don't need all of the other
    # fields
    notice_xml = notice_entry.read()
    notice_meta = meta_data(document_number, FULL_NOTICE_FIELDS)
    notice = build_notice(notice_xml.cfr_titles[0], None, notice_meta,
                          xml_to_process=notice_xml.xml)[0]
    sxs_entry.write(notice)
Esempio n. 25
0
def diffs(cfr_title, cfr_part):
    """Construct diffs between known trees."""
    logger.info("Build diffs - %s Part %s", cfr_title, cfr_part)
    tree_dir = entry.FrozenTree(cfr_title, cfr_part)
    diff_dir = entry.Diff(cfr_title, cfr_part)
    pairs = [(lhs, rhs) for lhs in tree_dir for rhs in tree_dir]
    deps = dependency.Graph()
    for lhs_id, rhs_id in pairs:
        deps.add(diff_dir / lhs_id / rhs_id, tree_dir / lhs_id)
        deps.add(diff_dir / lhs_id / rhs_id, tree_dir / rhs_id)

    trees = {}
    for lhs_id, rhs_id in pairs:
        path = diff_dir / lhs_id / rhs_id
        deps.validate_for(path)
        if deps.is_stale(path):
            if lhs_id not in trees:
                trees[lhs_id] = (tree_dir / lhs_id).read()
            if rhs_id not in trees:
                trees[rhs_id] = (tree_dir / rhs_id).read()

            path.write(dict(changes_between(trees[lhs_id], trees[rhs_id])))
Esempio n. 26
0
def test_stale_layers(monkeypatch):
    """We should have dependencies between all of the layers and their
    associated trees. We should also tie the meta layer to the version"""
    monkeypatch.setattr(layers, 'LAYER_CLASSES',
                        {'cfr': {
                            'keyterms': None,
                            'other': None
                        }})

    version_entry = entry.Version(111, 22, 'aaa')
    version_entry.write(Version('aaa', date.today(), Citation(1, 1)))
    tree_entry = entry.Tree(111, 22, 'aaa')
    with pytest.raises(dependency.Missing):
        layers.stale_layers(tree_entry, 'cfr')

    entry.Entry('tree', 111, 22, 'bbb').write(b'')  # wrong version
    with pytest.raises(dependency.Missing):
        layers.stale_layers(tree_entry, 'cfr')

    entry.Entry('tree', 111, 22, 'aaa').write(b'')
    assert set(layers.stale_layers(tree_entry, 'cfr')) == {'keyterms', 'other'}

    assert str(version_entry) in dependency.Graph().dependencies(
        str(entry.Layer.cfr(111, 22, 'aaa', 'meta')))
 def dependency_graph(self):
     with CliRunner().isolated_filesystem():
         path = entry.Entry('path')
         self.depender = path / 'depender'
         self.dependency = path / 'dependency'
         yield dependency.Graph()