Example #1
0
    def test_class_paths_to_classes(self):
        """A list of class paths should get converted into a list of
        classes"""
        results = plugins.class_paths_to_classes(self._FORWARD)
        self.assertEqual(results, [Meta, ExternalCitationParser])

        results = plugins.class_paths_to_classes(self._BACKWARD)
        self.assertEqual(results, [ExternalCitationParser, Meta])
    def preprocess(self):
        """Unfortunately, the notice xml is often inaccurate. This function
        attempts to fix some of those (general) flaws. For specific issues, we
        tend to instead use the files in settings.LOCAL_XML_PATHS"""

        for preprocessor in class_paths_to_classes(settings.PREPROCESSORS):
            preprocessor().transform(self.xml)

        return self
    def preprocess(self):
        """Unfortunately, the notice xml is often inaccurate. This function
        attempts to fix some of those (general) flaws. For specific issues, we
        tend to instead use the files in settings.LOCAL_XML_PATHS"""

        for preprocessor in class_paths_to_classes(settings.PREPROCESSORS):
            preprocessor().transform(self.xml)

        return self
Example #4
0
def tree_and_builder(filename, title, checkpoint_path=None, doc_number=None):
    """Reads the regulation file and parses it. Returns the resulting tree as
    well as a Builder object for further manipulation. Looks up the doc_number
    if it's not provided"""
    make_fake = doc_number is not None
    if checkpoint_path is None:
        checkpointer = NullCheckpointer()
    else:
        checkpointer = Checkpointer(checkpoint_path)

    reg_text = ''
    with codecs.open(filename, 'r', 'utf-8') as f:
        reg_text = f.read()
    file_digest = hashlib.sha256(reg_text.encode('utf-8')).hexdigest()
    if reg_text[:1] == '<':
        reg_xml = etree.fromstring(reg_text)
    else:
        raise ValueError("Building from text input is no longer supported")

    for preprocessor in class_paths_to_classes(settings.PREPROCESSORS):
        preprocessor().transform(reg_xml)

    reg_tree = checkpointer.checkpoint(
        "init-tree-" + file_digest,
        lambda: xml_parser.reg_text.build_tree(reg_xml))
    title_part = reg_tree.label_id()
    if doc_number is None:
        doc_number = checkpointer.checkpoint(
            "doc-number-" + file_digest,
            lambda: Builder.determine_doc_number(reg_xml, title, title_part))
    if not doc_number:
        raise ValueError("Could not determine document number")

    checkpointer.suffix = ":".join(["", title_part, str(title), doc_number])

    if make_fake:
        fake_notice = notice_fake.build(
            doc_number, notice_fake.effective_date_for(reg_xml), title,
            title_part)
    else:
        fake_notice = None
    builder = Builder(cfr_title=title,
                      cfr_part=title_part,
                      doc_number=doc_number,
                      checkpointer=checkpointer,
                      fake_notice=fake_notice)
    return reg_tree, builder
Example #5
0
def tree_and_builder(filename, title, checkpoint_path=None, doc_number=None):
    """Reads the regulation file and parses it. Returns the resulting tree as
    well as a Builder object for further manipulation. Looks up the doc_number
    if it's not provided"""
    make_fake = doc_number is not None
    if checkpoint_path is None:
        checkpointer = NullCheckpointer()
    else:
        checkpointer = Checkpointer(checkpoint_path)

    reg_text = ''
    with codecs.open(filename, 'r', 'utf-8') as f:
        reg_text = f.read()
    file_digest = hashlib.sha256(reg_text.encode('utf-8')).hexdigest()
    if reg_text[:1] == '<':
        reg_xml = etree.fromstring(reg_text)
    else:
        raise ValueError("Building from text input is no longer supported")

    for preprocessor in class_paths_to_classes(settings.PREPROCESSORS):
        preprocessor().transform(reg_xml)

    reg_tree = checkpointer.checkpoint(
        "init-tree-" + file_digest,
        lambda: xml_parser.reg_text.build_tree(reg_xml))
    title_part = reg_tree.label_id()
    if doc_number is None:
        doc_number = checkpointer.checkpoint(
            "doc-number-" + file_digest,
            lambda: Builder.determine_doc_number(reg_xml, title, title_part))
    if not doc_number:
        raise ValueError("Could not determine document number")

    checkpointer.suffix = ":".join(["", title_part, str(title), doc_number])

    if make_fake:
        fake_notice = notice_fake.build(
            doc_number, notice_fake.effective_date_for(reg_xml), title,
            title_part)
    else:
        fake_notice = None
    builder = Builder(cfr_title=title,
                      cfr_part=title_part,
                      doc_number=doc_number,
                      checkpointer=checkpointer,
                      fake_notice=fake_notice)
    return reg_tree, builder