def test_validate_keyterms(self): tree = etree.fromstring(""" <section xmlns="eregs" > <paragraph> <title type="keyterm">A Keyterm.</title> <content>A Keyterm. This paragraph should error.</content> </paragraph> <paragraph> <title type="keyterm">Another Keyterm.</title> <content>Keyterm. Fragment This one should warn.</content> </paragraph> </section> """) validator = EregsValidator(settings.XSD_FILE) validator.validate_keyterms(tree) self.assertEqual(len(validator.events), 3) self.assertEqual(validator.events[0].severity, Severity.ERROR) self.assertTrue('Duplicate keyterm' in validator.events[0].msg) self.assertEqual(validator.events[1].severity, Severity.WARNING) self.assertTrue('keyterm fragment' in validator.events[1].msg) self.assertEqual(validator.events[2].severity, Severity.WARNING) self.assertTrue('repeating keyterms' in validator.events[2].msg)
def test_invalid_content_is_not_valid_and_has_critical_errors(self): xml = etree.fromstring("""<regulation xmlns="eregs"> <fdsys> <cfrTitleNum>this-should-be-a-number</cfrTitleNum> <cfrTitleText>TESTING</cfrTitleText> <volume>99</volume> <date>2017-01-01</date> <originalDate>2017-01-01</originalDate> <title>TEST</title> </fdsys> <preamble> <agency>Bureau of Consumer Financial Protection</agency> <regLetter>TEST</regLetter> <cfr> <title>123</title> <section>4567</section> </cfr> <documentNumber>9999-99999</documentNumber> <effectiveDate>2017-01-01</effectiveDate> <federalRegisterURL>https://www.foo.gov/testing</federalRegisterURL> </preamble> <part label="9999"> <content></content> </part> </regulation>""") validator = EregsValidator(settings.XSD_FILE) validator.validate_reg(xml) self.assertFalse(validator.is_valid) self.assertTrue(validator.has_critical_errors)
def test_migrate_analysis_notice(self): tree = etree.fromstring( """ <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys> <date>2015-11-17</date> </fdsys> <preamble> <documentNumber>2015-12345</documentNumber> </preamble> <changeset> <change operation="added" label="1234-5"> <paragraph label="1234-5"> <content/> <analysis> <analysisSection>Some addedanalysis</analysisSection> </analysis> </paragraph> </change> </changeset> </notice>""" ) validator = EregsValidator(settings.XSD_FILE) result = validator.migrate_analysis(tree) self.assertEqual(len(result.find(".//{eregs}analysis")), 1) analysis = result.find(".//{eregs}analysis") analysis_parent = analysis.getparent() analysis_section = analysis.find("{eregs}analysisSection") self.assertEqual(analysis_parent.tag, "{eregs}notice") self.assertEqual(analysis_section.get("target"), "1234-5") self.assertEqual(analysis_section.get("notice"), "2015-12345")
def test_migrate_analysis_reg(self): tree = etree.fromstring( """ <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys> <date>2015-11-17</date> </fdsys> <preamble> <documentNumber>2015-12345</documentNumber> </preamble> <part label="1234"> <subpart> <section label="1234-1"> <analysis> <analysisSection>Some analysis</analysisSection> </analysis> </section> </subpart> </part> </regulation>""" ) validator = EregsValidator(settings.XSD_FILE) result = validator.migrate_analysis(tree) self.assertEqual(len(result.find(".//{eregs}analysis")), 1) analysis = result.find(".//{eregs}analysis") analysis_parent = analysis.getparent() analysis_section = analysis.find("{eregs}analysisSection") self.assertEqual(analysis_parent.tag, "{eregs}regulation") self.assertEqual(analysis_section.get("target"), "1234-1") self.assertEqual(analysis_section.get("notice"), "2015-12345")
def test_validate_keyterms(self): tree = etree.fromstring( """ <section xmlns="eregs" > <paragraph> <title type="keyterm">A Keyterm.</title> <content>A Keyterm. This paragraph should error.</content> </paragraph> <paragraph> <title type="keyterm">Another Keyterm.</title> <content>Keyterm. Fragment This one should warn.</content> </paragraph> </section> """ ) validator = EregsValidator(settings.XSD_FILE) validator.validate_keyterms(tree) self.assertEqual(len(validator.events), 3) self.assertEqual(validator.events[0].severity, Severity.ERROR) self.assertTrue("Duplicate keyterm" in validator.events[0].msg) self.assertEqual(validator.events[1].severity, Severity.WARNING) self.assertTrue("keyterm fragment" in validator.events[1].msg) self.assertEqual(validator.events[2].severity, Severity.WARNING) self.assertTrue("repeating keyterms" in validator.events[2].msg)
def test_migrate_analysis_change_analysis_only(self): tree = etree.fromstring(""" <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys> <date>2015-11-17</date> </fdsys> <preamble> <documentNumber>2015-12345</documentNumber> </preamble> <changeset> <change operation="added" label="1234-2-Analysis" parent="1234-2"> <analysis label="1234-Analysis"> <analysisSection>An added analysis</analysisSection> </analysis> </change> </changeset> </notice>""") validator = EregsValidator(settings.XSD_FILE) result = validator.migrate_analysis(tree) self.assertEqual(len(result.find('.//{eregs}analysis')), 1) analysis = result.find('.//{eregs}analysis') analysis_parent = analysis.getparent() analysis_section = analysis.find('{eregs}analysisSection') self.assertEqual(analysis_parent.tag, '{eregs}notice') self.assertEqual(analysis_section.get('target'), '1234-2') self.assertEqual(analysis_section.get('notice'), '2015-12345') # The empty change should've been deleted. self.assertEqual(len(result.findall('.//{eregs}change')), 0)
def test_migrate_analysis_reg(self): tree = etree.fromstring(""" <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd"> <fdsys> <date>2015-11-17</date> </fdsys> <preamble> <documentNumber>2015-12345</documentNumber> </preamble> <part label="1234"> <subpart> <section label="1234-1"> <analysis> <analysisSection>Some analysis</analysisSection> </analysis> </section> </subpart> </part> </regulation>""") validator = EregsValidator(settings.XSD_FILE) result = validator.migrate_analysis(tree) self.assertEqual(len(result.find('.//{eregs}analysis')), 1) analysis = result.find('.//{eregs}analysis') analysis_parent = analysis.getparent() analysis_section = analysis.find('{eregs}analysisSection') self.assertEqual(analysis_parent.tag, '{eregs}regulation') self.assertEqual(analysis_section.get('target'), '1234-1') self.assertEqual(analysis_section.get('notice'), '2015-12345')
def get_validator(xml_tree): # Validate the file relative to schema validator = EregsValidator(settings.XSD_FILE) validator.validate_reg(xml_tree) if not validator.is_valid: for event in validator.events: print(str(event)) sys.exit(0) return validator
def get_validator(xml_tree, raise_instead_of_exiting=False): # Validate the file relative to schema validator = EregsValidator(settings.XSD_FILE) validator.validate_reg(xml_tree) if not validator.is_valid: for event in validator.events: print(str(event)) if raise_instead_of_exiting: raise event else: sys.exit(0) return validator
def migrate_analysis(cfr_title, cfr_part): """ Migrate analysis from its context to top-level """ # Prompt user to be sure they want to do this print(colored('This will irrevocably modify all regulation and notice files for this regulation. ' 'Is this ok?', 'red')) answer = None while answer not in ['y', 'n']: answer = raw_input('Migrate all analysis? y/n: ') if answer != 'y': return # Migrate regulation files regml_reg_files = find_all(cfr_part) for reg_file in regml_reg_files: print(reg_file) file_name = os.path.join(reg_file) with open(file_name, 'r') as f: reg_xml = f.read() parser = etree.XMLParser(huge_tree=True) xml_tree = etree.fromstring(reg_xml, parser) validator = EregsValidator(settings.XSD_FILE) validator.migrate_analysis(xml_tree, file_name) validator.validate_reg(xml_tree) # Migrate notices regml_notice_files = find_all(cfr_part, is_notice=True) regml_notices = [] for notice_file in regml_notice_files: print(notice_file) file_name = os.path.join(notice_file) with open(file_name, 'r') as f: reg_xml = f.read() parser = etree.XMLParser(huge_tree=True) xml_tree = etree.fromstring(reg_xml, parser) validator = EregsValidator(settings.XSD_FILE) validator.migrate_analysis(xml_tree, file_name) validator.validate_reg(xml_tree)
def test_is_not_valid_with_any_errors(self): validator = EregsValidator(settings.XSD_FILE) validator.events = [EregsValidationEvent('test', Severity.INFO)] self.assertFalse(validator.is_valid)
def test_load_schema_raises_if_remote_schema_does_not_exist(self): with self.assertRaises(etree.XMLSchemaParseError): EregsValidator('http://some.non/existent.url').load_schema()
def test_load_schema_raises_if_local_schema_does_not_exist(self): with self.assertRaises(etree.XMLSchemaParseError): EregsValidator('/some/non/existent/path').load_schema()
def test_load_schema_works_if_schema_is_local(self): try: EregsValidator(settings.XSD_FILE).load_schema() except etree.XMLSchemaParseError: self.fail('schema should load successfully from local path')
def test_has_critical_errors_true_with_critical_errors(self): validator = EregsValidator(settings.XSD_FILE) validator.events = [EregsValidationEvent('test', Severity.CRITICAL)] self.assertTrue(validator.has_critical_errors)
def test_has_critical_errors_false_with_non_severe_errors(self): validator = EregsValidator(settings.XSD_FILE) validator.events = [EregsValidationEvent('test', Severity.INFO)] self.assertFalse(validator.has_critical_errors)
def test_has_critical_errors_false_with_no_errors(self): validator = EregsValidator(settings.XSD_FILE) self.assertFalse(validator.has_critical_errors)
def parser_driver(regulation_file, check_terms=False, correct_interps=False, headerize_interps=False, fix_missed_cites=False): with open(regulation_file, 'r') as f: reg_xml = f.read() xml_tree = etree.fromstring(reg_xml) # validate relative to schema validator = EregsValidator(settings.XSD_FILE) validator.validate_reg(xml_tree) if not validator.is_valid: for event in validator.events: print(str(event)) sys.exit(0) reg_tree = build_reg_tree(xml_tree) reg_number = reg_tree.label[0] # we can correct interps right away if necessary if correct_interps: validator.insert_interp_markers(xml_tree, regulation_file) if headerize_interps: validator.headerize_interps(xml_tree, regulation_file) if fix_missed_cites: validator.fix_omitted_cites(xml_tree, regulation_file) paragraph_markers = build_paragraph_marker_layer(xml_tree) internal_citations = build_internal_citations_layer(xml_tree) external_citations = build_external_citations_layer(xml_tree) terms = build_terms_layer(xml_tree) meta = build_meta_layer(xml_tree) toc = build_toc_layer(xml_tree) keyterms = build_keyterm_layer(xml_tree) graphics = build_graphics_layer(xml_tree) formatting = build_formatting_layer(xml_tree) interps = build_interp_layer(xml_tree) analysis = build_analysis(xml_tree) notice_dict = build_notice(xml_tree) # if the validator had problems then we should report them and bail out validator.validate_terms(xml_tree, terms) validator.validate_internal_cites(xml_tree, internal_citations) if check_terms: validator.validate_term_references(xml_tree, terms, regulation_file) for event in validator.events: print(str(event)) reg_tree.include_children = True reg_json = reg_tree.to_json() notice = xml_tree.find('.//{eregs}documentNumber').text version = os.path.split(regulation_file)[-1].replace('.xml', '') if notice != version: print('Notice ({}) different from version ({}), using version'.format(notice, version)) notice = version write_layer(reg_json, reg_number, notice, 'regulation') write_layer(meta, reg_number, notice, 'layer/meta') write_layer(paragraph_markers, reg_number, notice, 'layer/paragraph-markers') write_layer(internal_citations, reg_number, notice, 'layer/internal-citations') write_layer(external_citations, reg_number, notice, 'layer/external-citations') write_layer(terms, reg_number, notice, 'layer/terms') write_layer(toc, reg_number, notice, 'layer/toc') write_layer(keyterms, reg_number, notice, 'layer/keyterms') write_layer(graphics, reg_number, notice, 'layer/graphics') write_layer(formatting, reg_number, notice, 'layer/formatting') write_layer(interps, reg_number, notice, 'layer/interpretations') write_layer(analysis, reg_number, notice, 'layer/analyses') write_layer(notice_dict, reg_number, notice, 'notice')
def test_is_valid_with_no_errors(self): validator = EregsValidator(settings.XSD_FILE) self.assertTrue(validator.is_valid)