def parse_regulation(args): """ Run the parser on the specified command-line arguments. Broken out into separate function to assist in profiling. """ with codecs.open(args.filename, 'r', 'utf-8') as f: reg = f.read() doc_number = args.notice act_title_and_section = [args.act_title, args.act_section] # First, the regulation tree reg_tree = Builder.reg_tree(reg) builder = Builder(cfr_title=args.title, cfr_part=reg_tree.label_id(), doc_number=doc_number) builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(doc_number, reg_tree, act_title_and_section, builder, layer_cache)
def build_by_notice(filename, title, act_title, act_section, notice_doc_numbers, doc_number=None, checkpoint=None): with codecs.open(filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() if checkpoint: checkpointer = Checkpointer(checkpoint) else: checkpointer = NullCheckpointer() # build the initial tree reg_tree = checkpointer.checkpoint("init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() if doc_number is None: doc_number = Builder.determine_doc_number(reg, title, title_part) checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # create the builder builder = Builder(cfr_title=title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.fetch_notices_json() for notice in notice_doc_numbers: builder.build_notice_from_doc_number(notice) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() act_title_and_section = [act_title, act_section] builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
def build_by_notice(filename, title, act_title, act_section, notice_doc_numbers, doc_number=None, checkpoint=None): with codecs.open(filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() if checkpoint: checkpointer = Checkpointer(checkpoint) else: checkpointer = NullCheckpointer() # build the initial tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() if doc_number is None: doc_number = Builder.determine_doc_number(reg, title, title_part) checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # create the builder builder = Builder(cfr_title=title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.fetch_notices_json() for notice in notice_doc_numbers: builder.build_notice_from_doc_number(notice) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() act_title_and_section = [act_title, act_section] builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
def parse_regulation(args): """ Run the parser on the specified command-line arguments. Broken out into separate function to assist in profiling. """ with codecs.open(args.filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() act_title_and_section = [args.act_title, args.act_section] if args.checkpoint: checkpointer = Checkpointer(args.checkpoint) else: checkpointer = NullCheckpointer() # First, the regulation tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() doc_number = checkpointer.checkpoint( "doc-number-" + file_digest, lambda: Builder.determine_doc_number(reg, args.title, title_part)) if not doc_number: raise ValueError("Could not determine document number") checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # Run Builder builder = Builder(cfr_title=args.title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) if args.generate_diffs: generate_diffs(doc_number, reg_tree, act_title_and_section, builder, layer_cache, checkpointer)
if __name__ == "__main__": if len(sys.argv) < 6: print("Usage: python build_from.py regulation.xml title " + "notice_doc_# act_title act_section (Generate diffs? " + "True/False)") print(" e.g. python build_from.py rege.txt 12 2011-31725 15 1693 " + "False") exit() with codecs.open(sys.argv[1], 'r', 'utf-8') as f: reg = f.read() doc_number = sys.argv[3] # First, the regulation tree reg_tree = Builder.reg_tree(reg) builder = Builder(cfr_title=int(sys.argv[2]), cfr_part=reg_tree.label_id(), doc_number=doc_number) # Didn't include the provided version if not any(n['document_number'] == doc_number for n in builder.notices): print "Could not find notice_doc_#, %s" % doc_number exit() builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree)
args = parser.parse_args() with codecs.open(args.filename, 'r', 'utf-8') as f: reg = f.read() file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest() act_title_and_section = [args.act_title, args.act_section] if args.checkpoint: checkpointer = Checkpointer(args.checkpoint) else: checkpointer = NullCheckpointer() # First, the regulation tree reg_tree = checkpointer.checkpoint( "init-tree-" + file_digest, lambda: Builder.reg_tree(reg)) title_part = reg_tree.label_id() doc_number = checkpointer.checkpoint( "doc-number-" + file_digest, lambda: Builder.determine_doc_number(reg, args.title, title_part)) if not doc_number: raise ValueError("Could not determine document number") checkpointer.suffix = ":".join( ["", title_part, str(args.title), doc_number]) # Run Builder builder = Builder(cfr_title=args.title, cfr_part=title_part, doc_number=doc_number, checkpointer=checkpointer) builder.write_notices()