예제 #1
0
def generic_tree(text, label, title=None):
    """Use the "generic" parser to build a tree. The "generic" parser simply
    splits on Title Case and treats body text as the node content."""
    segments = generic.segments(text)
    if not segments:
        return Node(text, label=label, title=title, node_type=Node.APPENDIX)

    children = []
    for index, seg in enumerate(segments):
        start, end = seg
        seg_title, body = utils.title_body(text[start:end])
        label_character = letter_for(index)
        children.append(
            Node(body, label=(
                label + [label_character]),
                title=seg_title, node_type=Node.APPENDIX))

    return Node(text[:segments[0][0]], children, label, title, Node.APPENDIX)
    def test_segments(self):
        lines = [
            "nonsection here", "Followed By A Title", "And then some content",
            "More content", "Yet Another Title", "Third Title",
            "Content here, too"
        ]
        offsets = generic.segments("\n".join(lines))
        self.assertEqual(3, len(offsets))

        start = len(lines[0] + "\n")
        end = len("\n".join(lines[:4]) + "\n")
        self.assertEqual((start, end), offsets[0])

        start = len("\n".join(lines[:4]) + "\n")
        end = len("\n".join(lines[:5]) + "\n")
        self.assertEqual((start, end), offsets[1])

        start = len("\n".join(lines[:5]) + "\n")
        end = len("\n".join(lines))
        self.assertEqual((start, end), offsets[2])
    def test_segments(self):
        lines = ["nonsection here",
                 "Followed By A Title",
                 "And then some content",
                 "More content",
                 "Yet Another Title",
                 "Third Title",
                 "Content here, too"]
        offsets = generic.segments("\n".join(lines))
        self.assertEqual(3, len(offsets))

        start = len(lines[0] + "\n")
        end = len("\n".join(lines[:4]) + "\n")
        self.assertEqual((start, end), offsets[0])

        start = len("\n".join(lines[:4]) + "\n")
        end = len("\n".join(lines[:5]) + "\n")
        self.assertEqual((start, end), offsets[1])

        start = len("\n".join(lines[:5]) + "\n")
        end = len("\n".join(lines))
        self.assertEqual((start, end), offsets[2])