Ejemplo n.º 1
0
def add_notices():
    """Adds an uneven assortment of notices"""
    root_attrs = {
        "eregs-version-id": "v0",
        "fr-volume": 1,
        "fr-start-page": 2,
        "fr-end-page": 3
    }
    with XMLBuilder("ROOT", **root_attrs) as ctx:
        ctx.AGENCY("Agency")
        ctx.SUBJECT("Subj")
        ctx.DATES(**{'eregs-published-date': '2001-01-01'})
        with ctx.EREGS_CFR_REFS():
            ctx.EREGS_CFR_TITLE_REF(title=11)
    xml = ctx.xml
    entry.Notice('v0').write(NoticeXML(xml))

    etree.SubElement(xml.xpath('//EREGS_CFR_TITLE_REF')[0],
                     'EREGS_CFR_PART_REF',
                     part='1000')
    xml.attrib['eregs-version-id'] = 'v1'
    entry.Notice('v1').write(NoticeXML(xml))

    xml.xpath('//EREGS_CFR_TITLE_REF')[0].attrib['title'] = '12'
    xml.attrib['eregs-version-id'] = 'v2'
    entry.Notice('v2').write(NoticeXML(xml))

    xml.attrib['eregs-version-id'] = 'v3'
    entry.Notice('v3').write(NoticeXML(xml))
Ejemplo n.º 2
0
    def test_single_notice_one_agency_meta(self, notice_xmls_for_url):
        """
        Verify that we get agency info from the metadata.
        """
        cli = CliRunner()
        agencies_info = [{
            u'name':
            u'Environmental Protection Agency',
            u'parent_id':
            None,
            u'raw_name':
            u'ENVIRONMENTAL PROTECTION AGENCY',
            u'url': ('https://www.federalregister.gov/agencies/'
                     'environmental-protection-agency'),
            u'json_url': ('https://www.federalregister.gov/api/v1/agencies/'
                          '145.json'),
            u'id':
            145
        }]
        self.expect_common_json(agencies=agencies_info)
        notice_xmls_for_url.return_value = [self.example_xml()]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(list(entry.Notice().sub_entries())))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(len(written.xpath("//EREGS_AGENCIES")), 1)
            self.assertEqual(len(written.xpath("//EREGS_AGENCY")), 1)
            epa = written.xpath("//EREGS_AGENCY")[0]
            self.assertEqual(epa.attrib["name"],
                             "Environmental Protection Agency")
            self.assertEqual(epa.attrib["raw-name"],
                             "ENVIRONMENTAL PROTECTION AGENCY")
            self.assertEqual(epa.attrib["agency-id"], "145")
Ejemplo n.º 3
0
    def test_single_notice_cfr_refs_from_metadata(self, notice_xmls_for_url):
        """
        Verify that we get CFR references from the metadata.
        """
        cli = CliRunner()
        self.expect_common_json(cfr_references=[{
            "title": "40",
            "part": "300"
        }, {
            "title": "40",
            "part": "301"
        }])
        notice_xmls_for_url.return_value = [self.example_xml()]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(list(entry.Notice().sub_entries())))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(len(written.xpath("//EREGS_CFR_REFS")), 1)
            self.assertEqual(len(written.xpath("//EREGS_CFR_TITLE_REF")), 1)
            title = written.xpath("//EREGS_CFR_TITLE_REF")[0]
            self.assertEqual(title.attrib["title"], "40")
            self.assertEqual(len(written.xpath("//EREGS_CFR_PART_REF")), 2)
            part = written.xpath("//EREGS_CFR_PART_REF")[0]
            self.assertEqual(part.attrib["part"], "300")
            part = written.xpath("//EREGS_CFR_PART_REF")[1]
            self.assertEqual(part.attrib["part"], "301")
Ejemplo n.º 4
0
    def test_single_notice(self, notice_xmls_for_url):
        """Integration test, verifying that if a document number is associated
        with only a single XML file, a single, modified result is written"""
        cli = CliRunner()
        self.expect_common_json()
        notice_xmls_for_url.return_value = [self.example_xml()]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(list(entry.Notice().sub_entries())))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(written.effective, date(2008, 8, 8))
Ejemplo n.º 5
0
def test_is_derived():
    """Should filter version ids to only those with a dependency on
    changes derived from a rule"""
    tree_dir = entry.Tree('12', '1000')

    deps = dependency.Graph()
    deps.add(tree_dir / 111, entry.Annual(12, 1000, 2001))
    deps.add(tree_dir / 222, entry.Notice(222))
    deps.add(tree_dir / 333, entry.Notice(333))
    deps.add(tree_dir / 333, entry.Version(333))
    assert not fill_with_rules.is_derived('111', deps, tree_dir)
    assert fill_with_rules.is_derived('222', deps, tree_dir)
    assert fill_with_rules.is_derived('333', deps, tree_dir)
    assert not fill_with_rules.is_derived('444', deps, tree_dir)
Ejemplo n.º 6
0
    def test_single_notice_comments_close_on_meta(self, notice_xmls_for_url):
        """
        Verify that when we have metadata for the comment closing date, we
        write it to the object.
        """
        cli = CliRunner()
        self.expect_common_json(comments_close_on="2010-10-10")
        notice_xmls_for_url.return_value = [self.example_xml()]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(list(entry.Notice().sub_entries())))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(written.comments_close_on, date(2010, 10, 10))
Ejemplo n.º 7
0
    def test_is_derived(self):
        """Should filter version ids to only those with a dependency on
        changes derived from a rule"""
        with self.cli.isolated_filesystem():
            tree_dir = entry.Tree('12', '1000')

            deps = dependency.Graph()
            deps.add(tree_dir / 111, entry.Annual(12, 1000, 2001))
            deps.add(tree_dir / 222, entry.Notice(222))
            deps.add(tree_dir / 333, entry.Notice(333))
            deps.add(tree_dir / 333, entry.Version(333))
            self.assertFalse(fill_with_rules.is_derived('111', deps, tree_dir))
            self.assertTrue(fill_with_rules.is_derived('222', deps, tree_dir))
            self.assertTrue(fill_with_rules.is_derived('333', deps, tree_dir))
            self.assertFalse(fill_with_rules.is_derived('444', deps, tree_dir))
Ejemplo n.º 8
0
def test_dependencies():
    """Expect nonexistent trees to depend on their predecessor, associated
    rule changes and version files. Shouldn't add dependencies for the
    first version, if missing"""
    versions = [Version(str(i)*3, date(2001, i, i), date(2002, i, i))
                for i in range(1, 7)]
    parents = Version.parents_of(versions)
    tree_dir = entry.Tree('12', '1000')
    notice_dir = entry.Notice()
    vers_dir = entry.Version('12', '1000')
    # Existing trees
    (tree_dir / '222').write(Node())
    (tree_dir / '555').write(Node())

    deps = fill_with_rules.dependencies(tree_dir, vers_dir,
                                        list(zip(versions, parents)))

    # First is skipped, as we can't build it from a rule
    assert str(tree_dir / '111') not in deps
    # Second can also be skipped as a tree already exists
    assert deps.dependencies(str(tree_dir / '222')) == []
    # Third relies on the associated versions and the second tree
    expected = {str(tree_dir / '222'), str(notice_dir / '333'),
                str(vers_dir / '333')}
    assert set(deps.dependencies(str(tree_dir / '333'))) == expected
    # Fourth relies on the third, even though it's not been built
    expected = {str(tree_dir / '333'), str(notice_dir / '444'),
                str(vers_dir / '444')}
    assert set(deps.dependencies(str(tree_dir / '444'))) == expected
    # Fifth can be skipped as the tree already exists
    assert deps.dependencies(str(tree_dir / '555')) == []
    # Six relies on the fifth
    expected = {str(tree_dir / '555'), str(notice_dir / '666'),
                str(vers_dir / '666')}
    assert set(deps.dependencies(str(tree_dir / '666'))) == expected
 def test_missing_notice(self):
     """We should get an exception if the notice isn't present"""
     with self.cli.isolated_filesystem():
         result = self.cli.invoke(proposal_versions, ['1111'])
         self.assertTrue(isinstance(result.exception, dependency.Missing))
         self.assertEqual(result.exception.dependency,
                          str(entry.Notice('1111')))
Ejemplo n.º 10
0
    def test_single_notice_rins(self, notice_xmls_for_url):
        """
        Verify that we get rins from the metadata and XMl.
        """
        cli = CliRunner()
        self.expect_common_json(regulation_id_numbers=["2050-AG65"])
        notice_xmls_for_url.return_value = [self.example_xml()]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(list(entry.Notice().sub_entries())))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(len(written.xpath("//EREGS_RINS")), 1)
            self.assertEqual(len(written.xpath("//EREGS_RIN")), 1)
            rin = written.xpath("//EREGS_RIN")[0]
            self.assertEqual(rin.attrib["rin"], "2050-AG65")
Ejemplo n.º 11
0
def is_derived(version_id, deps, tree_dir):
    """We only want to process trees which are created by parsing rules. To do
    that, we'll filter by those trees which have a dependency on a parsed
    rule"""
    tree = str(tree_dir / version_id)
    notice = str(entry.Notice(version_id))
    return notice in deps.dependencies(tree)
    def test_single_notice_comments_close_on_prefer(self, notice_xmls_for_url):
        """
        Verify that when we XML and metadata for the comment
        closing date, we use the metadata.
        """
        cli = CliRunner()
        self.expect_common_json(comments_close_on="2010-10-10")
        notice_xmls_for_url.return_value = [
            self.example_xml("Comments close on November 11, 2011")
        ]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(entry.Notice()))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(written.comments_close_on, date(2010, 10, 10))
    def test_single_notice_comments_close_on_xml(self, notice_xmls_for_url):
        """
        Verify that when we have XML info but no metadata for the comment
        closing date, we still write it to the object.
        """
        cli = CliRunner()
        self.expect_common_json()
        notice_xmls_for_url.return_value = [
            self.example_xml("Comments close on November 11, 2011")
        ]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(entry.Notice()))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(written.comments_close_on, date(2011, 11, 11))
    def test_dependencies(self, notice_xmls_for_url):
        """If the xml comes from a local source, we should expect a dependency
        be present. Otherwise, we should expect no dependency"""
        cli = CliRunner()
        self.expect_common_json()
        notice_xmls_for_url.return_value = [self.example_xml(source='./here')]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            entry_str = str(entry.Notice() / '1234-5678')
            self.assertIn(entry_str, dependency.Graph())

        notice_xmls_for_url.return_value[0].source = 'http://example.com'
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            entry_str = str(entry.Notice() / '1234-5678')
            self.assertNotIn(entry_str, dependency.Graph())
def import_notice(xml_file):
    """Convert XML file into a notice. May be used if manually creating a
    notice (e.g. from a Word doc). This command will also run a handful of
    validations on the XML"""
    notice_xml = parse_notice(xml_file)
    if notice_xml:
        notice_entry = entry.Notice(notice_xml.version_id)
        notice_entry.write(notice_xml)
Ejemplo n.º 16
0
    def test_single_notice_docket_ids(self, notice_xmls_for_url):
        """
        Verify that we get docket_ids from the metadata.
        """
        cli = CliRunner()
        self.expect_common_json(
            docket_ids=["EPA-HQ-SFUND-2010-1086", "FRL-9925-69-OLEM"])
        notice_xmls_for_url.return_value = [self.example_xml()]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(list(entry.Notice().sub_entries())))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(len(written.xpath("//EREGS_DOCKET_IDS")), 1)
            self.assertEqual(len(written.xpath("//EREGS_DOCKET_ID")), 2)
            did = written.xpath("//EREGS_DOCKET_ID")[0]
            self.assertEqual(did.attrib["docket_id"], "EPA-HQ-SFUND-2010-1086")
            did = written.xpath("//EREGS_DOCKET_ID")[1]
            self.assertEqual(did.attrib["docket_id"], "FRL-9925-69-OLEM")
Ejemplo n.º 17
0
def generate_dependencies(version_dir, version_ids, delays_by_version):
    """Creates a dependency graph and adds all dependencies for input xml and
    delays between notices"""
    notice_dir = entry.Notice()
    deps = dependency.Graph()
    for version_id in version_ids:
        deps.add(version_dir / version_id, notice_dir / version_id)
    for delayed, delay in delays_by_version.items():
        deps.add(version_dir / delayed, notice_dir / delay.by)
    return deps
Ejemplo n.º 18
0
 def test_dependencies_remote(self, notice_xmls_for_url):
     """If the xml comes from a remote source, we should not see a
     dependency"""
     cli = CliRunner()
     self.expect_common_json()
     notice_xmls_for_url.return_value = [self.example_xml(source='./here')]
     notice_xmls_for_url.return_value[0].source = 'http://example.com'
     with cli.isolated_filesystem():
         cli.invoke(preprocess_notice, ['1234-5678'])
         entry_str = str(entry.Notice() / '1234-5678')
         assert len(dependency.Graph().dependencies(entry_str)) == 0
Ejemplo n.º 19
0
def process(tree_path, previous, version_id):
    """Build and write a tree by combining the preceding tree with changes
    present in the associated rule"""
    prev_tree = (tree_path / previous).read()
    notice = entry.Notice(version_id).read()
    notice_changes = defaultdict(list)
    for amendment in notice.amendments:
        for label, change_list in amendment.get('changes', []):
            notice_changes[label].extend(change_list)
    new_tree = compile_regulation(prev_tree, notice_changes)
    (tree_path / version_id).write(new_tree)
Ejemplo n.º 20
0
def preprocess_notice(document_number):
    """Preprocess notice XML. Either fetch from the Federal Register or read a
    notice from disk. Apply some common transformations to it and output the
    resulting file(s). There may be more than one as documents might be split
    if they have multiple effective dates."""
    meta = federalregister.meta_data(
        document_number, [
            "agencies",
            "docket_ids",
            "effective_on",
            "cfr_references",
            "comments_close_on",
            "full_text_xml_url",
            "html_url",
            "publication_date",
            "regulation_id_numbers",
            "volume"
        ])
    notice_xmls = list(notice_xmls_for_url(document_number,
                                           meta['full_text_xml_url']))
    deps = dependency.Graph()
    for notice_xml in notice_xmls:
        notice_xml.published = meta['publication_date']
        notice_xml.fr_volume = meta['volume']
        if meta.get('html_url'):
            notice_xml.fr_html_url = meta['html_url']
        if meta.get("comments_close_on"):
            notice_xml.comments_close_on = meta["comments_close_on"]
        if meta.get('regulation_id_numbers'):
            notice_xml.rins = meta['regulation_id_numbers']
        if meta.get('docket_ids'):
            notice_xml.docket_ids = meta['docket_ids']

        notice_xml.set_agencies(meta.get('agencies', []))

        cfr_refs = convert_cfr_refs(meta.get('cfr_references', []))
        if cfr_refs:
            notice_xml.cfr_refs = cfr_refs

        file_name = document_number
        if len(notice_xmls) > 1:
            effective_date = notice_xml.derive_effective_date()
            file_name = split_doc_num(document_number,
                                      effective_date.isoformat())
        elif meta.get('effective_on'):
            notice_xml.effective = meta['effective_on']

        notice_xml.version_id = file_name
        notice_xml.derive_where_needed()

        notice_entry = entry.Notice(file_name)
        notice_entry.write(notice_xml)
        if notice_xml.source_is_local:
            deps.add(str(notice_entry), notice_xml.source)
Ejemplo n.º 21
0
 def test_missing_effective_date(self, notice_xmls_for_url):
     """We should not explode if no effective date is present. Instead, we
     should parse the effective date from the XML"""
     cli = CliRunner()
     self.expect_common_json(effective_on=None)
     notice_xmls_for_url.return_value = [
         self.example_xml("Effective January 1, 2001")
     ]
     with cli.isolated_filesystem():
         cli.invoke(preprocess_notice, ['1234-5678'])
         written = entry.Notice('1234-5678').read()
         self.assertEqual(written.effective, date(2001, 1, 1))
Ejemplo n.º 22
0
def proposal_versions(doc_number):
    """Generate version entries associated with a proposal."""
    notice = entry.Notice(doc_number)
    if not notice.exists():
        raise dependency.Missing(str(notice), str(notice))

    notice = notice.read()
    version = Version(doc_number, notice.published, None)

    for cfr_title, cfr_part in notice.cfr_ref_pairs:
        version_entry = entry.Version(cfr_title, cfr_part, doc_number)
        if not version_entry.exists() or version_entry.read() != version:
            version_entry.write(version)
Ejemplo n.º 23
0
def notice_preamble(doc_number):
    """Pull down and parse the preamble from this notice."""
    logger.info("Parsing Preamble for %s", doc_number)
    preamble_path = entry.Preamble(convert_id(doc_number))
    notice_path = entry.Notice(doc_number)

    deps = dependency.Graph()
    deps.add(preamble_path, notice_path)
    deps.validate_for(preamble_path)

    if deps.is_stale(preamble_path):
        preamble = parse_preamble(notice_path.read())
        preamble_path.write(preamble)
Ejemplo n.º 24
0
def test_process_version_if_needed_success():
    """If the requirements are present we should write the version data"""
    notice_xml = NoticeXML(XMLBuilder().xml)
    notice_xml.effective = date(2001, 1, 1)
    notice_xml.fr_volume = 2
    notice_xml.start_page = 3
    entry.Notice('vvv').write(notice_xml)

    full_issuance.process_version_if_needed('title', 'part', 'vvv')

    result = entry.Version('title', 'part', 'vvv').read()
    assert result.identifier == 'vvv'
    assert result.effective == date(2001, 1, 1)
    assert result.fr_citation == Citation(2, 3)
Ejemplo n.º 25
0
def versions(cfr_title, cfr_part):
    """Find all Versions for a regulation. Accounts for locally modified
    notice XML and rules modifying the effective date of versions of a
    regulation"""
    cfr_title, cfr_part = str(cfr_title), str(cfr_part)
    notice_dir = entry.Notice()

    logger.info("Finding versions")
    version_ids = fetch_version_ids(cfr_title, cfr_part, notice_dir)
    logger.debug("Versions found: %r", version_ids)
    xmls = {version_id: (notice_dir / version_id).read()
            for version_id in version_ids if version_id in notice_dir}
    delays_by_version = delays(xmls.values())
    write_if_needed(cfr_title, cfr_part, version_ids, xmls, delays_by_version)
def process_tree_if_needed(cfr_title, cfr_part, version_id):
    """Creates and writes a regulation tree if the appropriate notice
    exists"""
    notice_entry = entry.Notice(version_id)
    tree_entry = entry.Tree(cfr_title, cfr_part, version_id)

    deps = dependency.Graph()
    deps.add(tree_entry, notice_entry)
    deps.validate_for(tree_entry)

    if deps.is_stale(tree_entry):
        notice_xml = notice_entry.read()
        tree = build_tree(regtext_for_part(notice_xml, cfr_title, cfr_part))
        tree_entry.write(tree)
def write_if_stale(notice_xml):
    """We only want to write out the processed xml if it is "stale", i.e. if
    its source has changed"""
    deps = dependency.Graph()
    notice_entry = entry.Notice(notice_xml.version_id)

    new_notice = notice_entry not in deps
    diff_source = notice_xml.source not in deps.dependencies(notice_xml)
    source_changed = deps.is_stale(notice_entry)

    if new_notice or diff_source or source_changed:
        deps.clear_for(notice_entry)
        deps.add(notice_entry, notice_xml.source)
        notice_entry.write(notice_xml)
Ejemplo n.º 28
0
    def test_split_notice(self, notice_xmls_for_url):
        """Integration test, testing whether a notice which has been split
        (due to having multiple effective dates) is written as multiple
        files"""
        cli = CliRunner()
        self.expect_common_json()
        notice_xmls_for_url.return_value = [
            self.example_xml("Effective January 1, 2001"),
            self.example_xml("Effective February 2, 2002"),
            self.example_xml("Effective March 3, 2003")
        ]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            notice_path = entry.Notice()
            self.assertEqual(3, len(list(entry.Notice().sub_entries())))

            jan = (notice_path / '1234-5678_20010101').read()
            feb = (notice_path / '1234-5678_20020202').read()
            mar = (notice_path / '1234-5678_20030303').read()

            self.assertEqual(jan.effective, date(2001, 1, 1))
            self.assertEqual(feb.effective, date(2002, 2, 2))
            self.assertEqual(mar.effective, date(2003, 3, 3))
Ejemplo n.º 29
0
def test_process_tree_if_needed_success(monkeypatch):
    """If the requirements are present we should call tree-parsing function"""
    mock_regtext = Mock(return_value=Node('root'))
    monkeypatch.setattr(full_issuance, 'build_tree', mock_regtext)
    with XMLBuilder() as ctx:
        ctx.REGTEXT(TITLE=1, PART=2)
    entry.Notice('vvv').write(NoticeXML(ctx.xml))

    full_issuance.process_tree_if_needed('1', '2', 'vvv')

    result = entry.Tree('1', '2', 'vvv').read()
    assert result.text == 'root'
    xml_given = mock_regtext.call_args[0][0]
    assert etree.tostring(xml_given) == etree.tostring(ctx.xml[0])
Ejemplo n.º 30
0
    def test_single_notice_cfr_refs_from_xml(self, notice_xmls_for_url):
        """
        Verify that we get CFR references from the xml.
        """
        cli = CliRunner()
        self.expect_common_json()
        notice_xml = self.example_xml()
        cfr_el = etree.SubElement(notice_xml.xml, 'CFR')
        cfr_el.text = '40 CFR 300, 301'
        notice_xmls_for_url.return_value = [notice_xml]
        with cli.isolated_filesystem():
            cli.invoke(preprocess_notice, ['1234-5678'])
            self.assertEqual(1, len(list(entry.Notice().sub_entries())))

            written = entry.Notice('1234-5678').read()
            self.assertEqual(len(written.xpath("//EREGS_CFR_REFS")), 1)
            self.assertEqual(len(written.xpath("//EREGS_CFR_TITLE_REF")), 1)
            title = written.xpath("//EREGS_CFR_TITLE_REF")[0]
            self.assertEqual(title.attrib["title"], "40")
            self.assertEqual(len(written.xpath("//EREGS_CFR_PART_REF")), 2)
            part = written.xpath("//EREGS_CFR_PART_REF")[0]
            self.assertEqual(part.attrib["part"], "300")
            part = written.xpath("//EREGS_CFR_PART_REF")[1]
            self.assertEqual(part.attrib["part"], "301")