def ecfr_notice(title, cfr_part, notice, applies_to, act_title, act_section, with_version=False, without_notice=False): """ Generate RegML for a single notice from eCFR XML. """ # Get the notice the new one applies to with open(find_file(os.path.join(cfr_part, applies_to)), 'r') as f: reg_xml = f.read() parser = etree.XMLParser(huge_tree=True) xml_tree = etree.fromstring(reg_xml, parser) doc_number = xml_tree.find('.//{eregs}documentNumber').text # Validate the file relative to schema validator = get_validator(xml_tree) # Get the ecfr builder builder = Builder(cfr_title=title, cfr_part=cfr_part, doc_number=doc_number, checkpointer=None, writer_type='XML') # Fetch the notices from the FR API and find the notice we're # looking for builder.fetch_notices_json() print([n['document_number'] for n in builder.notices_json]) notice_json = next((n for n in builder.notices_json if n['document_number'] == notice)) # Build the notice notice = builder.build_single_notice(notice_json)[0] if 'changes' not in notice: print('There are no changes in this notice to apply.') return # We've successfully fetched and parsed the new notice. # Build a the reg tree and layers for the notice it applies to. old_tree = build_reg_tree(xml_tree) # Build the new reg tree from the old_tree + notice changes last_version = doc_number version = notice['document_number'] merged_changes = builder.merge_changes(version, notice['changes']) reg_tree = compile_regulation(old_tree, merged_changes) layer_cache = LayerCacheAggregator() layers = builder.generate_layers(reg_tree, [act_title, act_section], layer_cache) # Write the notice file if not without_notice: builder.write_notice(version, old_tree=old_tree, reg_tree=reg_tree, layers=layers, last_version=last_version) # Write the regulation file for the new notice if with_version: builder.write_regulation(new_tree, layers=layers)
def parse_regulation(args): """ Run the parser on the specified command-line arguments. Broken out into separate function to assist in profiling. """ with codecs.open(args.filename, 'r', 'utf-8') as f: reg = f.read() doc_number = args.notice act_title_and_section = [args.act_title, args.act_section] # First, the regulation tree reg_tree = Builder.reg_tree(reg) builder = Builder(cfr_title=args.title, cfr_part=reg_tree.label_id(), doc_number=doc_number) builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(doc_number, reg_tree, act_title_and_section, builder, layer_cache)
def build_by_notice(filename, title, act_title, act_section, notice_doc_numbers, doc_number=None, checkpoint=None): with codecs.open(filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() if checkpoint: checkpointer = Checkpointer(checkpoint) else: checkpointer = NullCheckpointer() # build the initial tree reg_tree = checkpointer.checkpoint("init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() if doc_number is None: doc_number = Builder.determine_doc_number(reg, title, title_part) checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # create the builder builder = Builder(cfr_title=title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.fetch_notices_json() for notice in notice_doc_numbers: builder.build_notice_from_doc_number(notice) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() act_title_and_section = [act_title, act_section] builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
def build_by_notice(filename, title, act_title, act_section, notice_doc_numbers, doc_number=None, checkpoint=None): with codecs.open(filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() if checkpoint: checkpointer = Checkpointer(checkpoint) else: checkpointer = NullCheckpointer() # build the initial tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() if doc_number is None: doc_number = Builder.determine_doc_number(reg, title, title_part) checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # create the builder builder = Builder(cfr_title=title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.fetch_notices_json() for notice in notice_doc_numbers: builder.build_notice_from_doc_number(notice) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() act_title_and_section = [act_title, act_section] builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
def parse_regulation(args): """ Run the parser on the specified command-line arguments. Broken out into separate function to assist in profiling. """ with codecs.open(args.filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() act_title_and_section = [args.act_title, args.act_section] if args.checkpoint: checkpointer = Checkpointer(args.checkpoint) else: checkpointer = NullCheckpointer() # First, the regulation tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() doc_number = checkpointer.checkpoint( "doc-number-" + file_digest, lambda: Builder.determine_doc_number(reg, args.title, title_part)) if not doc_number: raise ValueError("Could not determine document number") checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # Run Builder builder = Builder(cfr_title=args.title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(doc_number, reg_tree, act_title_and_section, builder, layer_cache, checkpointer)
reg_tree = Builder.reg_tree(reg) builder = Builder(cfr_title=int(sys.argv[2]), cfr_part=reg_tree.label_id(), doc_number=doc_number) # Didn't include the provided version if not any(n['document_number'] == doc_number for n in builder.notices): print "Could not find notice_doc_#, %s" % doc_number exit() builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, sys.argv[4:6], layer_cache) layer_cache.replace_using(reg_tree) if len(sys.argv) < 7 or sys.argv[6].lower() == 'true': all_versions = {doc_number: reg_tree} for last_notice, old, new_tree, notices in builder.revision_generator( reg_tree): version = last_notice['document_number'] logger.info("Version %s", version) all_versions[version] = new_tree builder.doc_number = version builder.write_regulation(new_tree) layer_cache.invalidate_by_notice(last_notice) builder.gen_and_write_layers(new_tree, sys.argv[4:6], layer_cache, notices)
lambda: Builder.determine_doc_number(reg, args.title, title_part)) if not doc_number: raise ValueError("Could not determine document number") checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # Run Builder builder = Builder(cfr_title=args.title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) # this used to assume implicitly that if gen-diffs was not specified it was # True; changed it to explicit check if args.generate_diffs: all_versions = {doc_number: reg_tree} for last_notice, old, new_tree, notices in builder.revision_generator( reg_tree): version = last_notice['document_number'] logger.info("Version %s", version) all_versions[version] = new_tree