def ecfr_all(title, file, act_title, act_section, with_all_versions=False, without_versions=False, without_notices=False, only_notice=None): """ Parse eCFR into RegML """ # Get the tree and layers reg_tree, builder = tree_and_builder( file, title, writer_type='XML') layer_cache = LayerCacheAggregator() layers = builder.generate_layers(reg_tree, [act_title, act_section], layer_cache) # Do the first version last_version = builder.doc_number print("Version {}".format(last_version)) if (only_notice is not None and builder.doc_number == only_notice) \ or only_notice is None: if not without_versions: builder.write_regulation(reg_tree, layers=layers) for last_notice, old, new_tree, notices in builder.revision_generator( reg_tree): version = last_notice['document_number'] print("Version {}".format(version)) builder.doc_number = version layers = builder.generate_layers(new_tree, [act_title, act_section], layer_cache, notices) if (only_notice is not None and version == only_notice) or \ only_notice is None: if with_all_versions: builder.write_regulation(new_tree, layers=layers) if not without_notices: builder.write_notice(version, old_tree=old, reg_tree=new_tree, layers=layers, last_version=last_version) layer_cache.invalidate_by_notice(last_notice) layer_cache.replace_using(new_tree) last_version = version del last_notice, old, new_tree, notices # free some memory
def generate_xml(filename, title, act_title, act_section, notice_doc_numbers, doc_number=None, checkpoint=None): act_title_and_section = [act_title, act_section] # First, the regulation tree reg_tree, builder = tree_and_builder(filename, title, checkpoint, writer_type='XML') layer_cache = LayerCacheAggregator() layers = builder.generate_layers(reg_tree, act_title_and_section, layer_cache) # Always do at least the first reg logger.info("Version", builder.doc_number) builder.write_regulation(reg_tree, layers=layers) all_versions = {doc_number: FrozenNode.from_node(reg_tree)} for last_notice, old, new_tree, notices in builder.revision_generator( reg_tree): version = last_notice['document_number'] logger.info("Version %s", version) all_versions[version] = FrozenNode.from_node(new_tree) builder.doc_number = version layers = builder.generate_layers(new_tree, act_title_and_section, layer_cache, notices) builder.write_regulation(new_tree, layers=layers) builder.write_notice(version, old_tree=old, reg_tree=new_tree, layers=layers) layer_cache.invalidate_by_notice(last_notice) layer_cache.replace_using(new_tree) del last_notice, old, new_tree, notices # free some memory
def test_layer_cache(self, init): """Integration test for layer caching""" init.return_value = None cache = LayerCacheAggregator() b = Builder() # Don't need parameters as init's been mocked out b.cfr_title, b.cfr_part, b.doc_number = 15, '111', '111-222' b.writer = Mock() b.checkpointer = NullCheckpointer() write = b.writer.layer.return_value.write tree = Node(label=["1234"], children=[ Node(label=["1234", "1"], children=[ Node("See paragraph (b)", label=["1234", "1", "a"]), Node("This is b", label=["1234", "1", "b"]) ]) ]) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) cache.replace_using(tree) write.reset_mock() tree.children[0].children[1].text = "References paragraph (a)" b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) write.reset_mock() tree.children[0].children[0].text = "Contains no references" b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) write.reset_mock() notice = {'document_number': '111-222'} cache.invalidate_by_notice(notice) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) write.reset_mock() notice['changes'] = {'1234-1-b': 'some change'} cache.invalidate_by_notice(notice) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a', '1234-1-b'], list(sorted(arg.keys()))) write.reset_mock() notice['changes'] = {'1234-Subpart-A': 'some change'} cache.invalidate_by_notice(notice) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-b'], list(sorted(arg.keys())))
def test_layer_cache(self, init): """Integration test for layer caching""" init.return_value = None cache = LayerCacheAggregator() b = Builder() # Don't need parameters as init's been mocked out b.cfr_title, b.cfr_part, b.doc_number = 15, '111', '111-222' b.writer = Mock() b.checkpointer = NullCheckpointer() write = b.writer.layer.return_value.write tree = Node(label=["1234"], children=[ Node(label=["1234", "1"], children=[ Node("See paragraph (b)", label=["1234", "1", "a"]), Node("This is b", label=["1234", "1", "b"])])]) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) cache.replace_using(tree) write.reset_mock() tree.children[0].children[1].text = "References paragraph (a)" b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) write.reset_mock() tree.children[0].children[0].text = "Contains no references" b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) write.reset_mock() notice = {'document_number': '111-222'} cache.invalidate_by_notice(notice) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a'], arg.keys()) write.reset_mock() notice['changes'] = {'1234-1-b': 'some change'} cache.invalidate_by_notice(notice) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-a', '1234-1-b'], list(sorted(arg.keys()))) write.reset_mock() notice['changes'] = {'1234-Subpart-A': 'some change'} cache.invalidate_by_notice(notice) b.gen_and_write_layers(tree, [], cache, []) arg = write.call_args_list[3][0][0] self.assertEqual(['1234-1-b'], list(sorted(arg.keys())))
builder.write_notices() # Always do at least the first reg logger.info("Version %s", doc_number) builder.write_regulation(reg_tree) layer_cache = LayerCacheAggregator() builder.gen_and_write_layers(reg_tree, sys.argv[4:6], layer_cache) layer_cache.replace_using(reg_tree) if len(sys.argv) < 7 or sys.argv[6].lower() == 'true': all_versions = {doc_number: reg_tree} for last_notice, old, new_tree, notices in builder.revision_generator( reg_tree): version = last_notice['document_number'] logger.info("Version %s", version) all_versions[version] = new_tree builder.doc_number = version builder.write_regulation(new_tree) layer_cache.invalidate_by_notice(last_notice) builder.gen_and_write_layers(new_tree, sys.argv[4:6], layer_cache, notices) layer_cache.replace_using(new_tree) # now build diffs - include "empty" diffs comparing a version to itself for lhs_version, lhs_tree in all_versions.iteritems(): for rhs_version, rhs_tree in all_versions.iteritems(): comparer = treediff.Compare(lhs_tree, rhs_tree) comparer.compare() builder.writer.diff(reg_tree.label_id(), lhs_version, rhs_version).write(comparer.changes)
builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache) layer_cache.replace_using(reg_tree) # this used to assume implicitly that if gen-diffs was not specified it was # True; changed it to explicit check if args.generate_diffs: all_versions = {doc_number: reg_tree} for last_notice, old, new_tree, notices in builder.revision_generator( reg_tree): version = last_notice['document_number'] logger.info("Version %s", version) all_versions[version] = new_tree builder.doc_number = version builder.write_regulation(new_tree) layer_cache.invalidate_by_notice(last_notice) builder.gen_and_write_layers(new_tree, act_title_and_section, layer_cache, notices) layer_cache.replace_using(new_tree) # convert to frozen trees for doc in all_versions: all_versions[doc] = FrozenNode.from_node(all_versions[doc]) # now build diffs - include "empty" diffs comparing a version to itself for lhs_version, lhs_tree in all_versions.iteritems(): for rhs_version, rhs_tree in all_versions.iteritems(): changes = checkpointer.checkpoint( "-".join(["diff", lhs_version, rhs_version]), lambda: dict(changes_between(lhs_tree, rhs_tree))) builder.writer.diff(