Beispiel #1
0
def generate_diffs(doc_number, reg_tree, act_title_and_section, builder,
                   layer_cache):
    """ Generate all the diffs for the given regulation. Broken out into separate function
        to assist with profiling so it's easier to determine which parts of the parser take
        the most time
    """

    all_versions = {doc_number: reg_tree}

    for last_notice, old, new_tree, notices in builder.revision_generator(
            reg_tree):
        version = last_notice['document_number']
        logger.info("Version %s", version)
        all_versions[version] = new_tree
        builder.doc_number = version
        builder.write_regulation(new_tree)
        layer_cache.invalidate_by_notice(last_notice)
        builder.gen_and_write_layers(new_tree, act_title_and_section,
                                     layer_cache, notices)
        layer_cache.replace_using(new_tree)

    # now build diffs - include "empty" diffs comparing a version to itself
    for lhs_version, lhs_tree in all_versions.iteritems():
        for rhs_version, rhs_tree in all_versions.iteritems():
            comparer = treediff.Compare(lhs_tree, rhs_tree)
            comparer.compare()
            builder.writer.diff(reg_tree.label_id(), lhs_version,
                                rhs_version).write(comparer.changes)
Beispiel #2
0
    def test_title_disappears(self):
        lhs = struct.Node("Text", title="Some Title", label=['1111'])
        rhs = struct.Node("Text", title=None, label=['1111'])

        comparer = treediff.Compare(lhs, rhs)
        comparer.compare()
        self.assertEqual(comparer.changes['1111'], {
            'title': [('delete', 0, 10)],
            'op': 'modified'
        })
Beispiel #3
0
    def test_subparts(self):
        """ Create a tree with no subparts, then add subparts. """
        title = u"Regulation Title"
        sect1_title = u"§ 204.1 First Section"
        sect1 = u"(a) I believe this is (b) the best section "
        sect2_title = u"§ 204.2 Second Section"
        sect2 = u"Some sections \ndon't have \ndepth at all."

        old_text = "\n".join([title, sect1_title, sect1, sect2_title, sect2])
        older = reg_text.build_reg_text_tree(old_text, 204)

        ntitle = u"Regulation Title"
        nsubpart_a = u"Subpart A—First subpart"
        nsect1_title = u"§ 204.1 First Section"
        nsect1 = u"(a) I believe this is (b) the best section "
        nsubpart_b = u"Subpart B—Second subpart"
        nsect2_title = u"§ 204.2 Second Section"
        nsect2 = u"Some sections \ndon't have \ndepth at all."

        new_text = "\n".join([
            ntitle, nsubpart_a, nsect1_title, nsect1, nsubpart_b, nsect2_title,
            nsect2
        ])
        newer = reg_text.build_reg_text_tree(new_text, 204)

        comparer = treediff.Compare(older, newer)
        comparer.compare()

        self.assertEquals(
            comparer.changes['204-Subpart-A'], {
                "node": {
                    "text": "",
                    "node_type": "subpart",
                    "label": ["204", "Subpart", "A"],
                    "child_labels": ["204-1"],
                    "title": "First subpart"
                },
                "op": "added"
            })
        self.assertTrue('204-Subpart-B' in comparer.changes)
        self.assertEquals(comparer.changes['204-Subpart'], {"op": "deleted"})
    builder.write_notices()

    #   Always do at least the first reg
    logger.info("Version %s", doc_number)
    builder.write_regulation(reg_tree)
    layer_cache = LayerCacheAggregator()
    builder.gen_and_write_layers(reg_tree, sys.argv[4:6], layer_cache)
    layer_cache.replace_using(reg_tree)
    if len(sys.argv) < 7 or sys.argv[6].lower() == 'true':
        all_versions = {doc_number: reg_tree}
        for last_notice, old, new_tree, notices in builder.revision_generator(
                reg_tree):
            version = last_notice['document_number']
            logger.info("Version %s", version)
            all_versions[version] = new_tree
            builder.doc_number = version
            builder.write_regulation(new_tree)
            layer_cache.invalidate_by_notice(last_notice)
            builder.gen_and_write_layers(new_tree, sys.argv[4:6], layer_cache,
                                         notices)
            layer_cache.replace_using(new_tree)

        # now build diffs - include "empty" diffs comparing a version to itself
        for lhs_version, lhs_tree in all_versions.iteritems():
            for rhs_version, rhs_tree in all_versions.iteritems():
                comparer = treediff.Compare(lhs_tree, rhs_tree)
                comparer.compare()
                builder.writer.diff(reg_tree.label_id(), lhs_version,
                                    rhs_version).write(comparer.changes)
from regparser.diff import api_reader
from regparser.diff import treediff
from regparser.tree.struct import node_decode_hook

if __name__ == "__main__":
    api = api_reader.Client()
    old_tree = api.regulation('1005', '2011-31725')
    new_tree = api.regulation('1005', '2013-10604')

    comparer = treediff.Compare(old_tree, new_tree)
    comparer.compare()
    print comparer.as_json()