def test_determine_doc_number_annual(self, fetch_notice_json): """The _latest_ document number pre-effective date should be pulled out of an annual edition of the reg""" fetch_notice_json.return_value = [ {'document_number': '111-111', 'effective_on': '2011-01-01', 'publication_date': '2011-01-01'}, {'document_number': '222-222', 'effective_on': '2011-10-20', 'publication_date': '2011-02-02'}, {'document_number': '333-333', 'effective_on': '2011-10-20', 'publication_date': '2011-03-03'}, {'document_number': '444-444', 'effective_on': '2011-04-04', 'publication_date': '2011-04-04'}] xml_str = """<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="cfr.xsl"?> <CFRGRANULE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="CFRMergedXML.xsd"> <FDSYS> <CFRTITLE>12</CFRTITLE> <DATE>2013-01-01</DATE> <ORIGINALDATE>2012-01-01</ORIGINALDATE> </FDSYS> </CFRGRANULE>""" xml = etree.fromstring(xml_str) self.assertEqual( '333-333', Builder.determine_doc_number(xml, '12', '34')) args = fetch_notice_json.call_args self.assertEqual(('12', '34'), args[0]) # positional args self.assertEqual({'max_effective_date': '2012-01-01', 'only_final': True}, args[1]) # kw args
def test_determine_doc_number_annual(self, fetch_notice_json): """Verify that a document number can be pulled out of an annual edition of the reg""" fetch_notice_json.return_value = [{ 'el': 1, 'document_number': '111-111' }, { 'el': 2, 'document_number': '222-222' }] xml_str = """<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="cfr.xsl"?> <CFRGRANULE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="CFRMergedXML.xsd"> <FDSYS> <CFRTITLE>12</CFRTITLE> <DATE>2013-01-01</DATE> <ORIGINALDATE>2012-01-01</ORIGINALDATE> </FDSYS> </CFRGRANULE>""" self.assertEqual('111-111', Builder.determine_doc_number(xml_str, '12', '34')) args = fetch_notice_json.call_args self.assertEqual(('12', '34'), args[0]) # positional args self.assertEqual( { 'max_effective_date': '2012-01-01', 'only_final': True }, args[1]) # kw args
def test_determine_doc_number_fr(self): """Verify that a document number can be pulled out of an FR notice""" xml_str = """ <RULE> <FRDOC>[FR Doc. 2011-31715 Filed 12-21-11; 8:45 am]</FRDOC> <BILCOD>BILLING CODE 4810-AM-P</BILCOD> </RULE>""" self.assertEqual('2011-31715', Builder.determine_doc_number(xml_str, '00', '00'))
def test_determine_doc_number_fr(self): """Verify that a document number can be pulled out of an FR notice""" xml_str = """ <RULE> <FRDOC>[FR Doc. 2011-31715 Filed 12-21-11; 8:45 am]</FRDOC> <BILCOD>BILLING CODE 4810-AM-P</BILCOD> </RULE>""" self.assertEqual( '2011-31715', Builder.determine_doc_number(xml_str, '00', '00'))
def build_by_notice(filename, title, act_title, act_section, notice_doc_numbers, doc_number=None, checkpoint=None): with codecs.open(filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() if checkpoint: checkpointer = Checkpointer(checkpoint) else: checkpointer = NullCheckpointer() # build the initial tree reg_tree = checkpointer.checkpoint("init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() if doc_number is None: doc_number = Builder.determine_doc_number(reg, title, title_part) checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # create the builder builder = Builder(cfr_title=title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.fetch_notices_json() for notice in notice_doc_numbers: builder.build_notice_from_doc_number(notice) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() act_title_and_section = [act_title, act_section] builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
def build_by_notice(filename, title, act_title, act_section, notice_doc_numbers, doc_number=None, checkpoint=None): with codecs.open(filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() if checkpoint: checkpointer = Checkpointer(checkpoint) else: checkpointer = NullCheckpointer() # build the initial tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() if doc_number is None: doc_number = Builder.determine_doc_number(reg, title, title_part) checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # create the builder builder = Builder(cfr_title=title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.fetch_notices_json() for notice in notice_doc_numbers: builder.build_notice_from_doc_number(notice) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() act_title_and_section = [act_title, act_section] builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
def parse_regulation(args): """ Run the parser on the specified command-line arguments. Broken out into separate function to assist in profiling. """ with codecs.open(args.filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() act_title_and_section = [args.act_title, args.act_section] if args.checkpoint: checkpointer = Checkpointer(args.checkpoint) else: checkpointer = NullCheckpointer() # First, the regulation tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() doc_number = checkpointer.checkpoint( "doc-number-" + file_digest, lambda: Builder.determine_doc_number(reg, args.title, title_part)) if not doc_number: raise ValueError("Could not determine document number") checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # Run Builder builder = Builder(cfr_title=args.title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(doc_number, reg_tree, act_title_and_section, builder, layer_cache, checkpointer)
def test_determine_doc_number_annual(self, fetch_notice_json): """Verify that a document number can be pulled out of an annual edition of the reg""" fetch_notice_json.return_value = [ {'el': 1, 'document_number': '111-111'}, {'el': 2, 'document_number': '222-222'}] xml_str = """<?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="cfr.xsl"?> <CFRGRANULE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="CFRMergedXML.xsd"> <FDSYS> <CFRTITLE>12</CFRTITLE> <DATE>2013-01-01</DATE> <ORIGINALDATE>2012-01-01</ORIGINALDATE> </FDSYS> </CFRGRANULE>""" self.assertEqual( '111-111', Builder.determine_doc_number(xml_str, '12', '34')) args = fetch_notice_json.call_args self.assertEqual(('12', '34'), args[0]) # positional args self.assertEqual({'max_effective_date': '2012-01-01', 'only_final': True}, args[1]) # kw args
file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() act_title_and_section = [args.act_title, args.act_section] if args.checkpoint: checkpointer = Checkpointer(args.checkpoint) else: checkpointer = NullCheckpointer() # First, the regulation tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() doc_number = checkpointer.checkpoint( "doc-number-" + file_digest, lambda: Builder.determine_doc_number(reg, args.title, title_part)) if not doc_number: raise ValueError("Could not determine document number") checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # Run Builder builder = Builder(cfr_title=args.title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree)