def test_filename(self):
     """Verify that an appropriate file name is generated in an appropriate
     folder"""
     file_path = tempfile.mkdtemp() + os.path.join('some', 'depth', 'here')
     cp = Checkpointer(file_path)
     cp.counter = 25
     filename = cp._filename('A WeIrD TaG')
     self.assertTrue(os.path.join('some', 'depth', 'here') in filename)
     self.assertTrue('25' in filename)
     self.assertTrue('aweirdtag' in filename)
Example #2
0
 def test_filename(self):
     """Verify that an appropriate file name is generated in an appropriate
     folder"""
     file_path = tempfile.mkdtemp() + os.path.join('some', 'depth', 'here')
     cp = Checkpointer(file_path)
     cp.counter = 25
     filename = cp._filename('A WeIrD TaG')
     self.assertTrue(os.path.join('some', 'depth', 'here') in filename)
     self.assertTrue('25' in filename)
     self.assertTrue('aweirdtag' in filename)
Example #3
0
def build_by_notice(filename,
                    title,
                    act_title,
                    act_section,
                    notice_doc_numbers,
                    doc_number=None,
                    checkpoint=None):

    with codecs.open(filename, 'r', 'utf-8') as f:
        reg = f.read()
        file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest()

    if checkpoint:
        checkpointer = Checkpointer(checkpoint)
    else:
        checkpointer = NullCheckpointer()

    # build the initial tree
    reg_tree = checkpointer.checkpoint("init-tree-" + file_digest,
                                       lambda: Builder.reg_tree(reg))

    title_part = reg_tree.label_id()

    if doc_number is None:
        doc_number = Builder.determine_doc_number(reg, title, title_part)

    checkpointer.suffix = ":".join(
        ["", title_part, str(args.title), doc_number])

    # create the builder
    builder = Builder(cfr_title=title,
                      cfr_part=title_part,
                      doc_number=doc_number,
                      checkpointer=checkpointer)

    builder.fetch_notices_json()

    for notice in notice_doc_numbers:
        builder.build_notice_from_doc_number(notice)

    builder.write_regulation(reg_tree)
    layer_cache = LayerCacheAggregator()

    act_title_and_section = [act_title, act_section]

    builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache)
    layer_cache.replace_using(reg_tree)

    if args.generate_diffs:
        generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
Example #4
0
    def test_dont_load_later_elements(self):
        """If a checkpoint is executed, we should not load any later
        checkpoints. This allows a user to delete, say step 5, and effectively
        rebuild from that checkpoint."""
        cp = Checkpointer(tempfile.mkdtemp())
        self.assertEqual(cp.checkpoint("1", lambda: 1), 1)
        self.assertEqual(cp.checkpoint("2", lambda: 2), 2)
        self.assertEqual(cp.checkpoint("3", lambda: 3), 3)

        cp._reset()
        self.assertEqual(cp.checkpoint("1", lambda: -1), 1)
        self.assertEqual(cp.checkpoint("2", lambda: -2, force=True), -2)
        self.assertEqual(cp.checkpoint("3", lambda: -3), -3)
def build_by_notice(filename, title, act_title, act_section,
        notice_doc_numbers, doc_number=None, checkpoint=None):

    with codecs.open(filename, 'r', 'utf-8') as f:
        reg = f.read()
        file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest()

    if checkpoint:
        checkpointer = Checkpointer(checkpoint)
    else:
        checkpointer = NullCheckpointer()

    # build the initial tree
    reg_tree = checkpointer.checkpoint(
        "init-tree-" + file_digest,
        lambda: Builder.reg_tree(reg))

    title_part = reg_tree.label_id()
    
    if doc_number is None:
        doc_number = Builder.determine_doc_number(reg, title, title_part)

    checkpointer.suffix = ":".join(
        ["", title_part, str(args.title), doc_number])

    # create the builder
    builder = Builder(cfr_title=title,
                      cfr_part=title_part,
                      doc_number=doc_number,
                      checkpointer=checkpointer)

    builder.fetch_notices_json()

    for notice in notice_doc_numbers:
        builder.build_notice_from_doc_number(notice)

    builder.write_regulation(reg_tree)
    layer_cache = LayerCacheAggregator()

    act_title_and_section = [act_title, act_section]

    builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache)
    layer_cache.replace_using(reg_tree)

    if args.generate_diffs:
        generate_diffs(reg_tree, act_title_and_section, builder, layer_cache)
Example #6
0
def parse_regulation(args):
    """ Run the parser on the specified command-line arguments. Broken out into
        separate function to assist in profiling.
    """
    with codecs.open(args.filename, 'r', 'utf-8') as f:
        reg = f.read()
        file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest()
    act_title_and_section = [args.act_title, args.act_section]

    if args.checkpoint:
        checkpointer = Checkpointer(args.checkpoint)
    else:
        checkpointer = NullCheckpointer()

    #   First, the regulation tree
    reg_tree = checkpointer.checkpoint(
        "init-tree-" + file_digest,
        lambda: Builder.reg_tree(reg))
    title_part = reg_tree.label_id()
    doc_number = checkpointer.checkpoint(
        "doc-number-" + file_digest,
        lambda: Builder.determine_doc_number(reg, args.title, title_part))
    if not doc_number:
        raise ValueError("Could not determine document number")
    checkpointer.suffix = ":".join(
        ["", title_part, str(args.title), doc_number])

    #   Run Builder
    builder = Builder(cfr_title=args.title,
                      cfr_part=title_part,
                      doc_number=doc_number,
                      checkpointer=checkpointer)
    builder.write_notices()

    #   Always do at least the first reg
    logger.info("Version %s", doc_number)
    builder.write_regulation(reg_tree)
    layer_cache = LayerCacheAggregator()

    builder.gen_and_write_layers(reg_tree, act_title_and_section, layer_cache)
    layer_cache.replace_using(reg_tree)

    if args.generate_diffs:
        generate_diffs(doc_number, reg_tree, act_title_and_section,
                builder, layer_cache, checkpointer)
    def test_dont_load_later_elements(self):
        """If a checkpoint is executed, we should not load any later
        checkpoints. This allows a user to delete, say step 5, and effectively
        rebuild from that checkpoint."""
        cp = Checkpointer(tempfile.mkdtemp())
        self.assertEqual(cp.checkpoint("1", lambda: 1), 1)
        self.assertEqual(cp.checkpoint("2", lambda: 2), 2)
        self.assertEqual(cp.checkpoint("3", lambda: 3), 3)

        cp._reset()
        self.assertEqual(cp.checkpoint("1", lambda: -1), 1)
        self.assertEqual(cp.checkpoint("2", lambda: -2, force=True), -2)
        self.assertEqual(cp.checkpoint("3", lambda: -3), -3)
Example #8
0
 def test_exception_reading(self):
     """If a file exists but is not the correct format, we expect
     deserialization to gracefully fail (rather than exploding)"""
     cp = Checkpointer(tempfile.mkdtemp())
     self.assertEqual(1, cp.checkpoint("1", lambda: 1))
     with open(cp._filename("1"), "w") as written_file:
         written_file.write("")
     cp._reset()
     # pickle will raise an exception, so we will recompute
     self.assertEqual(-1, cp.checkpoint("1", lambda: -1))
 def test_exception_reading(self):
     """If a file exists but is not the correct format, we expect
     deserialization to gracefully fail (rather than exploding)"""
     cp = Checkpointer(tempfile.mkdtemp())
     self.assertEqual(1, cp.checkpoint("1", lambda: 1))
     with open(cp._filename("1"), "w") as written_file:
         written_file.write("")
     cp._reset()
     # pickle will raise an exception, so we will recompute
     self.assertEqual(-1, cp.checkpoint("1", lambda: -1))
Example #10
0
    def test_tree_serialization(self):
        """Trees have embedded XML, which doesn't serialize well"""
        tree = Node(text="top",
                    label=["111"],
                    title="Reg 111",
                    children=[
                        Node(text="inner",
                             label=["111", "1"],
                             source_xml=etree.fromstring("""<tag>Hi</tag>"""))
                    ])

        cp = Checkpointer(tempfile.mkdtemp())
        cp.checkpoint("a-tag", lambda: tree)  # saving
        cp._reset()
        loaded = cp.checkpoint("a-tag", None)  # would explode if not loaded

        self.assertEqual(repr(tree), repr(loaded))
        self.assertEqual(etree.tostring(tree.children[0].source_xml),
                         etree.tostring(loaded.children[0].source_xml))
    def test_tree_serialization(self):
        """Trees have embedded XML, which doesn't serialize well"""
        tree = Node(
            text="top", label=["111"], title="Reg 111", children=[
                Node(text="inner", label=["111", "1"],
                     source_xml=etree.fromstring("""<tag>Hi</tag>"""))
            ])

        cp = Checkpointer(tempfile.mkdtemp())
        cp.checkpoint("a-tag", lambda: tree)    # saving
        cp._reset()
        loaded = cp.checkpoint("a-tag", None)   # would explode if not loaded

        self.assertEqual(repr(tree), repr(loaded))
        self.assertEqual(
            etree.tostring(tree.children[0].source_xml),
            etree.tostring(loaded.children[0].source_xml))
    def test_basic_serialization(self):
        """We should be able to store and retrieve an object. Verify that this
        is occurring outside of local memory by comparing to the original."""
        to_store = {"some": "value", 123: 456}
        cp = Checkpointer(tempfile.mkdtemp())
        cp.counter = 1
        cp._serialize("a-tag", to_store)
        to_store["some"] = "other"
        result = cp._deserialize("a-tag")
        self.assertEqual(result, {"some": "value", 123: 456})
        self.assertEqual(to_store, {"some": "other", 123: 456})

        cp.counter = 2
        cp._serialize("a-tag", to_store)
        to_store["some"] = "more"
        result = cp._deserialize("a-tag")
        self.assertEqual(result, {"some": "other", 123: 456})
        self.assertEqual(to_store, {"some": "more", 123: 456})
        cp.counter = 1
        result = cp._deserialize("a-tag")
        self.assertEqual(result, {"some": "value", 123: 456})
Example #13
0
 def test_dirs_created(self):
     """If the full path does not exist, it is created"""
     file_path = tempfile.mkdtemp() + os.path.join('some', 'depth', 'here')
     Checkpointer(file_path)
     self.assertTrue(os.path.isdir(file_path))
Example #14
0
    def test_basic_serialization(self):
        """We should be able to store and retrieve an object. Verify that this
        is occurring outside of local memory by comparing to the original."""
        to_store = {"some": "value", 123: 456}
        cp = Checkpointer(tempfile.mkdtemp())
        cp.counter = 1
        cp._serialize("a-tag", to_store)
        to_store["some"] = "other"
        result = cp._deserialize("a-tag")
        self.assertEqual(result, {"some": "value", 123: 456})
        self.assertEqual(to_store, {"some": "other", 123: 456})

        cp.counter = 2
        cp._serialize("a-tag", to_store)
        to_store["some"] = "more"
        result = cp._deserialize("a-tag")
        self.assertEqual(result, {"some": "other", 123: 456})
        self.assertEqual(to_store, {"some": "more", 123: 456})
        cp.counter = 1
        result = cp._deserialize("a-tag")
        self.assertEqual(result, {"some": "value", 123: 456})
Example #15
0
    parser.add_argument('act_title', type=int, help='Act title',
                        action='store')
    parser.add_argument('act_section', type=int, help='Act section')
    parser.add_argument('--generate-diffs', type=bool, help='Generate diffs?',
                        required=False, default=True)
    parser.add_argument('--checkpoint', required=False,
                        help='Directory to save checkpoint data')

    args = parser.parse_args()
    with codecs.open(args.filename, 'r', 'utf-8') as f:
        reg = f.read()
        file_digest = hashlib.sha256(reg.encode('utf-8')).hexdigest()
    act_title_and_section = [args.act_title, args.act_section]

    if args.checkpoint:
        checkpointer = Checkpointer(args.checkpoint)
    else:
        checkpointer = NullCheckpointer()

    #   First, the regulation tree
    reg_tree = checkpointer.checkpoint(
        "init-tree-" + file_digest,
        lambda: Builder.reg_tree(reg))
    title_part = reg_tree.label_id()
    doc_number = checkpointer.checkpoint(
        "doc-number-" + file_digest,
        lambda: Builder.determine_doc_number(reg, args.title, title_part))
    if not doc_number:
        raise ValueError("Could not determine document number")
    checkpointer.suffix = ":".join(
        ["", title_part, str(args.title), doc_number])