def add_notices(): """Adds an uneven assortment of notices""" root_attrs = { "eregs-version-id": "v0", "fr-volume": 1, "fr-start-page": 2, "fr-end-page": 3 } with XMLBuilder("ROOT", **root_attrs) as ctx: ctx.AGENCY("Agency") ctx.SUBJECT("Subj") ctx.DATES(**{'eregs-published-date': '2001-01-01'}) with ctx.EREGS_CFR_REFS(): ctx.EREGS_CFR_TITLE_REF(title=11) xml = ctx.xml entry.Notice('v0').write(NoticeXML(xml)) etree.SubElement(xml.xpath('//EREGS_CFR_TITLE_REF')[0], 'EREGS_CFR_PART_REF', part='1000') xml.attrib['eregs-version-id'] = 'v1' entry.Notice('v1').write(NoticeXML(xml)) xml.xpath('//EREGS_CFR_TITLE_REF')[0].attrib['title'] = '12' xml.attrib['eregs-version-id'] = 'v2' entry.Notice('v2').write(NoticeXML(xml)) xml.attrib['eregs-version-id'] = 'v3' entry.Notice('v3').write(NoticeXML(xml))
def test_single_notice_one_agency_meta(self, notice_xmls_for_url): """ Verify that we get agency info from the metadata. """ cli = CliRunner() agencies_info = [{ u'name': u'Environmental Protection Agency', u'parent_id': None, u'raw_name': u'ENVIRONMENTAL PROTECTION AGENCY', u'url': ('https://www.federalregister.gov/agencies/' 'environmental-protection-agency'), u'json_url': ('https://www.federalregister.gov/api/v1/agencies/' '145.json'), u'id': 145 }] self.expect_common_json(agencies=agencies_info) notice_xmls_for_url.return_value = [self.example_xml()] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(list(entry.Notice().sub_entries()))) written = entry.Notice('1234-5678').read() self.assertEqual(len(written.xpath("//EREGS_AGENCIES")), 1) self.assertEqual(len(written.xpath("//EREGS_AGENCY")), 1) epa = written.xpath("//EREGS_AGENCY")[0] self.assertEqual(epa.attrib["name"], "Environmental Protection Agency") self.assertEqual(epa.attrib["raw-name"], "ENVIRONMENTAL PROTECTION AGENCY") self.assertEqual(epa.attrib["agency-id"], "145")
def test_single_notice_cfr_refs_from_metadata(self, notice_xmls_for_url): """ Verify that we get CFR references from the metadata. """ cli = CliRunner() self.expect_common_json(cfr_references=[{ "title": "40", "part": "300" }, { "title": "40", "part": "301" }]) notice_xmls_for_url.return_value = [self.example_xml()] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(list(entry.Notice().sub_entries()))) written = entry.Notice('1234-5678').read() self.assertEqual(len(written.xpath("//EREGS_CFR_REFS")), 1) self.assertEqual(len(written.xpath("//EREGS_CFR_TITLE_REF")), 1) title = written.xpath("//EREGS_CFR_TITLE_REF")[0] self.assertEqual(title.attrib["title"], "40") self.assertEqual(len(written.xpath("//EREGS_CFR_PART_REF")), 2) part = written.xpath("//EREGS_CFR_PART_REF")[0] self.assertEqual(part.attrib["part"], "300") part = written.xpath("//EREGS_CFR_PART_REF")[1] self.assertEqual(part.attrib["part"], "301")
def test_single_notice(self, notice_xmls_for_url): """Integration test, verifying that if a document number is associated with only a single XML file, a single, modified result is written""" cli = CliRunner() self.expect_common_json() notice_xmls_for_url.return_value = [self.example_xml()] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(list(entry.Notice().sub_entries()))) written = entry.Notice('1234-5678').read() self.assertEqual(written.effective, date(2008, 8, 8))
def test_is_derived(): """Should filter version ids to only those with a dependency on changes derived from a rule""" tree_dir = entry.Tree('12', '1000') deps = dependency.Graph() deps.add(tree_dir / 111, entry.Annual(12, 1000, 2001)) deps.add(tree_dir / 222, entry.Notice(222)) deps.add(tree_dir / 333, entry.Notice(333)) deps.add(tree_dir / 333, entry.Version(333)) assert not fill_with_rules.is_derived('111', deps, tree_dir) assert fill_with_rules.is_derived('222', deps, tree_dir) assert fill_with_rules.is_derived('333', deps, tree_dir) assert not fill_with_rules.is_derived('444', deps, tree_dir)
def test_single_notice_comments_close_on_meta(self, notice_xmls_for_url): """ Verify that when we have metadata for the comment closing date, we write it to the object. """ cli = CliRunner() self.expect_common_json(comments_close_on="2010-10-10") notice_xmls_for_url.return_value = [self.example_xml()] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(list(entry.Notice().sub_entries()))) written = entry.Notice('1234-5678').read() self.assertEqual(written.comments_close_on, date(2010, 10, 10))
def test_is_derived(self): """Should filter version ids to only those with a dependency on changes derived from a rule""" with self.cli.isolated_filesystem(): tree_dir = entry.Tree('12', '1000') deps = dependency.Graph() deps.add(tree_dir / 111, entry.Annual(12, 1000, 2001)) deps.add(tree_dir / 222, entry.Notice(222)) deps.add(tree_dir / 333, entry.Notice(333)) deps.add(tree_dir / 333, entry.Version(333)) self.assertFalse(fill_with_rules.is_derived('111', deps, tree_dir)) self.assertTrue(fill_with_rules.is_derived('222', deps, tree_dir)) self.assertTrue(fill_with_rules.is_derived('333', deps, tree_dir)) self.assertFalse(fill_with_rules.is_derived('444', deps, tree_dir))
def test_dependencies(): """Expect nonexistent trees to depend on their predecessor, associated rule changes and version files. Shouldn't add dependencies for the first version, if missing""" versions = [Version(str(i)*3, date(2001, i, i), date(2002, i, i)) for i in range(1, 7)] parents = Version.parents_of(versions) tree_dir = entry.Tree('12', '1000') notice_dir = entry.Notice() vers_dir = entry.Version('12', '1000') # Existing trees (tree_dir / '222').write(Node()) (tree_dir / '555').write(Node()) deps = fill_with_rules.dependencies(tree_dir, vers_dir, list(zip(versions, parents))) # First is skipped, as we can't build it from a rule assert str(tree_dir / '111') not in deps # Second can also be skipped as a tree already exists assert deps.dependencies(str(tree_dir / '222')) == [] # Third relies on the associated versions and the second tree expected = {str(tree_dir / '222'), str(notice_dir / '333'), str(vers_dir / '333')} assert set(deps.dependencies(str(tree_dir / '333'))) == expected # Fourth relies on the third, even though it's not been built expected = {str(tree_dir / '333'), str(notice_dir / '444'), str(vers_dir / '444')} assert set(deps.dependencies(str(tree_dir / '444'))) == expected # Fifth can be skipped as the tree already exists assert deps.dependencies(str(tree_dir / '555')) == [] # Six relies on the fifth expected = {str(tree_dir / '555'), str(notice_dir / '666'), str(vers_dir / '666')} assert set(deps.dependencies(str(tree_dir / '666'))) == expected
def test_missing_notice(self): """We should get an exception if the notice isn't present""" with self.cli.isolated_filesystem(): result = self.cli.invoke(proposal_versions, ['1111']) self.assertTrue(isinstance(result.exception, dependency.Missing)) self.assertEqual(result.exception.dependency, str(entry.Notice('1111')))
def test_single_notice_rins(self, notice_xmls_for_url): """ Verify that we get rins from the metadata and XMl. """ cli = CliRunner() self.expect_common_json(regulation_id_numbers=["2050-AG65"]) notice_xmls_for_url.return_value = [self.example_xml()] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(list(entry.Notice().sub_entries()))) written = entry.Notice('1234-5678').read() self.assertEqual(len(written.xpath("//EREGS_RINS")), 1) self.assertEqual(len(written.xpath("//EREGS_RIN")), 1) rin = written.xpath("//EREGS_RIN")[0] self.assertEqual(rin.attrib["rin"], "2050-AG65")
def is_derived(version_id, deps, tree_dir): """We only want to process trees which are created by parsing rules. To do that, we'll filter by those trees which have a dependency on a parsed rule""" tree = str(tree_dir / version_id) notice = str(entry.Notice(version_id)) return notice in deps.dependencies(tree)
def test_single_notice_comments_close_on_prefer(self, notice_xmls_for_url): """ Verify that when we XML and metadata for the comment closing date, we use the metadata. """ cli = CliRunner() self.expect_common_json(comments_close_on="2010-10-10") notice_xmls_for_url.return_value = [ self.example_xml("Comments close on November 11, 2011") ] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(entry.Notice())) written = entry.Notice('1234-5678').read() self.assertEqual(written.comments_close_on, date(2010, 10, 10))
def test_single_notice_comments_close_on_xml(self, notice_xmls_for_url): """ Verify that when we have XML info but no metadata for the comment closing date, we still write it to the object. """ cli = CliRunner() self.expect_common_json() notice_xmls_for_url.return_value = [ self.example_xml("Comments close on November 11, 2011") ] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(entry.Notice())) written = entry.Notice('1234-5678').read() self.assertEqual(written.comments_close_on, date(2011, 11, 11))
def test_dependencies(self, notice_xmls_for_url): """If the xml comes from a local source, we should expect a dependency be present. Otherwise, we should expect no dependency""" cli = CliRunner() self.expect_common_json() notice_xmls_for_url.return_value = [self.example_xml(source='./here')] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) entry_str = str(entry.Notice() / '1234-5678') self.assertIn(entry_str, dependency.Graph()) notice_xmls_for_url.return_value[0].source = 'http://example.com' with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) entry_str = str(entry.Notice() / '1234-5678') self.assertNotIn(entry_str, dependency.Graph())
def import_notice(xml_file): """Convert XML file into a notice. May be used if manually creating a notice (e.g. from a Word doc). This command will also run a handful of validations on the XML""" notice_xml = parse_notice(xml_file) if notice_xml: notice_entry = entry.Notice(notice_xml.version_id) notice_entry.write(notice_xml)
def test_single_notice_docket_ids(self, notice_xmls_for_url): """ Verify that we get docket_ids from the metadata. """ cli = CliRunner() self.expect_common_json( docket_ids=["EPA-HQ-SFUND-2010-1086", "FRL-9925-69-OLEM"]) notice_xmls_for_url.return_value = [self.example_xml()] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(list(entry.Notice().sub_entries()))) written = entry.Notice('1234-5678').read() self.assertEqual(len(written.xpath("//EREGS_DOCKET_IDS")), 1) self.assertEqual(len(written.xpath("//EREGS_DOCKET_ID")), 2) did = written.xpath("//EREGS_DOCKET_ID")[0] self.assertEqual(did.attrib["docket_id"], "EPA-HQ-SFUND-2010-1086") did = written.xpath("//EREGS_DOCKET_ID")[1] self.assertEqual(did.attrib["docket_id"], "FRL-9925-69-OLEM")
def generate_dependencies(version_dir, version_ids, delays_by_version): """Creates a dependency graph and adds all dependencies for input xml and delays between notices""" notice_dir = entry.Notice() deps = dependency.Graph() for version_id in version_ids: deps.add(version_dir / version_id, notice_dir / version_id) for delayed, delay in delays_by_version.items(): deps.add(version_dir / delayed, notice_dir / delay.by) return deps
def test_dependencies_remote(self, notice_xmls_for_url): """If the xml comes from a remote source, we should not see a dependency""" cli = CliRunner() self.expect_common_json() notice_xmls_for_url.return_value = [self.example_xml(source='./here')] notice_xmls_for_url.return_value[0].source = 'http://example.com' with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) entry_str = str(entry.Notice() / '1234-5678') assert len(dependency.Graph().dependencies(entry_str)) == 0
def process(tree_path, previous, version_id): """Build and write a tree by combining the preceding tree with changes present in the associated rule""" prev_tree = (tree_path / previous).read() notice = entry.Notice(version_id).read() notice_changes = defaultdict(list) for amendment in notice.amendments: for label, change_list in amendment.get('changes', []): notice_changes[label].extend(change_list) new_tree = compile_regulation(prev_tree, notice_changes) (tree_path / version_id).write(new_tree)
def preprocess_notice(document_number): """Preprocess notice XML. Either fetch from the Federal Register or read a notice from disk. Apply some common transformations to it and output the resulting file(s). There may be more than one as documents might be split if they have multiple effective dates.""" meta = federalregister.meta_data( document_number, [ "agencies", "docket_ids", "effective_on", "cfr_references", "comments_close_on", "full_text_xml_url", "html_url", "publication_date", "regulation_id_numbers", "volume" ]) notice_xmls = list(notice_xmls_for_url(document_number, meta['full_text_xml_url'])) deps = dependency.Graph() for notice_xml in notice_xmls: notice_xml.published = meta['publication_date'] notice_xml.fr_volume = meta['volume'] if meta.get('html_url'): notice_xml.fr_html_url = meta['html_url'] if meta.get("comments_close_on"): notice_xml.comments_close_on = meta["comments_close_on"] if meta.get('regulation_id_numbers'): notice_xml.rins = meta['regulation_id_numbers'] if meta.get('docket_ids'): notice_xml.docket_ids = meta['docket_ids'] notice_xml.set_agencies(meta.get('agencies', [])) cfr_refs = convert_cfr_refs(meta.get('cfr_references', [])) if cfr_refs: notice_xml.cfr_refs = cfr_refs file_name = document_number if len(notice_xmls) > 1: effective_date = notice_xml.derive_effective_date() file_name = split_doc_num(document_number, effective_date.isoformat()) elif meta.get('effective_on'): notice_xml.effective = meta['effective_on'] notice_xml.version_id = file_name notice_xml.derive_where_needed() notice_entry = entry.Notice(file_name) notice_entry.write(notice_xml) if notice_xml.source_is_local: deps.add(str(notice_entry), notice_xml.source)
def test_missing_effective_date(self, notice_xmls_for_url): """We should not explode if no effective date is present. Instead, we should parse the effective date from the XML""" cli = CliRunner() self.expect_common_json(effective_on=None) notice_xmls_for_url.return_value = [ self.example_xml("Effective January 1, 2001") ] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) written = entry.Notice('1234-5678').read() self.assertEqual(written.effective, date(2001, 1, 1))
def proposal_versions(doc_number): """Generate version entries associated with a proposal.""" notice = entry.Notice(doc_number) if not notice.exists(): raise dependency.Missing(str(notice), str(notice)) notice = notice.read() version = Version(doc_number, notice.published, None) for cfr_title, cfr_part in notice.cfr_ref_pairs: version_entry = entry.Version(cfr_title, cfr_part, doc_number) if not version_entry.exists() or version_entry.read() != version: version_entry.write(version)
def notice_preamble(doc_number): """Pull down and parse the preamble from this notice.""" logger.info("Parsing Preamble for %s", doc_number) preamble_path = entry.Preamble(convert_id(doc_number)) notice_path = entry.Notice(doc_number) deps = dependency.Graph() deps.add(preamble_path, notice_path) deps.validate_for(preamble_path) if deps.is_stale(preamble_path): preamble = parse_preamble(notice_path.read()) preamble_path.write(preamble)
def test_process_version_if_needed_success(): """If the requirements are present we should write the version data""" notice_xml = NoticeXML(XMLBuilder().xml) notice_xml.effective = date(2001, 1, 1) notice_xml.fr_volume = 2 notice_xml.start_page = 3 entry.Notice('vvv').write(notice_xml) full_issuance.process_version_if_needed('title', 'part', 'vvv') result = entry.Version('title', 'part', 'vvv').read() assert result.identifier == 'vvv' assert result.effective == date(2001, 1, 1) assert result.fr_citation == Citation(2, 3)
def versions(cfr_title, cfr_part): """Find all Versions for a regulation. Accounts for locally modified notice XML and rules modifying the effective date of versions of a regulation""" cfr_title, cfr_part = str(cfr_title), str(cfr_part) notice_dir = entry.Notice() logger.info("Finding versions") version_ids = fetch_version_ids(cfr_title, cfr_part, notice_dir) logger.debug("Versions found: %r", version_ids) xmls = {version_id: (notice_dir / version_id).read() for version_id in version_ids if version_id in notice_dir} delays_by_version = delays(xmls.values()) write_if_needed(cfr_title, cfr_part, version_ids, xmls, delays_by_version)
def process_tree_if_needed(cfr_title, cfr_part, version_id): """Creates and writes a regulation tree if the appropriate notice exists""" notice_entry = entry.Notice(version_id) tree_entry = entry.Tree(cfr_title, cfr_part, version_id) deps = dependency.Graph() deps.add(tree_entry, notice_entry) deps.validate_for(tree_entry) if deps.is_stale(tree_entry): notice_xml = notice_entry.read() tree = build_tree(regtext_for_part(notice_xml, cfr_title, cfr_part)) tree_entry.write(tree)
def write_if_stale(notice_xml): """We only want to write out the processed xml if it is "stale", i.e. if its source has changed""" deps = dependency.Graph() notice_entry = entry.Notice(notice_xml.version_id) new_notice = notice_entry not in deps diff_source = notice_xml.source not in deps.dependencies(notice_xml) source_changed = deps.is_stale(notice_entry) if new_notice or diff_source or source_changed: deps.clear_for(notice_entry) deps.add(notice_entry, notice_xml.source) notice_entry.write(notice_xml)
def test_split_notice(self, notice_xmls_for_url): """Integration test, testing whether a notice which has been split (due to having multiple effective dates) is written as multiple files""" cli = CliRunner() self.expect_common_json() notice_xmls_for_url.return_value = [ self.example_xml("Effective January 1, 2001"), self.example_xml("Effective February 2, 2002"), self.example_xml("Effective March 3, 2003") ] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) notice_path = entry.Notice() self.assertEqual(3, len(list(entry.Notice().sub_entries()))) jan = (notice_path / '1234-5678_20010101').read() feb = (notice_path / '1234-5678_20020202').read() mar = (notice_path / '1234-5678_20030303').read() self.assertEqual(jan.effective, date(2001, 1, 1)) self.assertEqual(feb.effective, date(2002, 2, 2)) self.assertEqual(mar.effective, date(2003, 3, 3))
def test_process_tree_if_needed_success(monkeypatch): """If the requirements are present we should call tree-parsing function""" mock_regtext = Mock(return_value=Node('root')) monkeypatch.setattr(full_issuance, 'build_tree', mock_regtext) with XMLBuilder() as ctx: ctx.REGTEXT(TITLE=1, PART=2) entry.Notice('vvv').write(NoticeXML(ctx.xml)) full_issuance.process_tree_if_needed('1', '2', 'vvv') result = entry.Tree('1', '2', 'vvv').read() assert result.text == 'root' xml_given = mock_regtext.call_args[0][0] assert etree.tostring(xml_given) == etree.tostring(ctx.xml[0])
def test_single_notice_cfr_refs_from_xml(self, notice_xmls_for_url): """ Verify that we get CFR references from the xml. """ cli = CliRunner() self.expect_common_json() notice_xml = self.example_xml() cfr_el = etree.SubElement(notice_xml.xml, 'CFR') cfr_el.text = '40 CFR 300, 301' notice_xmls_for_url.return_value = [notice_xml] with cli.isolated_filesystem(): cli.invoke(preprocess_notice, ['1234-5678']) self.assertEqual(1, len(list(entry.Notice().sub_entries()))) written = entry.Notice('1234-5678').read() self.assertEqual(len(written.xpath("//EREGS_CFR_REFS")), 1) self.assertEqual(len(written.xpath("//EREGS_CFR_TITLE_REF")), 1) title = written.xpath("//EREGS_CFR_TITLE_REF")[0] self.assertEqual(title.attrib["title"], "40") self.assertEqual(len(written.xpath("//EREGS_CFR_PART_REF")), 2) part = written.xpath("//EREGS_CFR_PART_REF")[0] self.assertEqual(part.attrib["part"], "300") part = written.xpath("//EREGS_CFR_PART_REF")[1] self.assertEqual(part.attrib["part"], "301")