def generic_tree(text, label, title=None): """Use the "generic" parser to build a tree. The "generic" parser simply splits on Title Case and treats body text as the node content.""" segments = generic.segments(text) if not segments: return Node(text, label=label, title=title, node_type=Node.APPENDIX) children = [] for index, seg in enumerate(segments): start, end = seg seg_title, body = utils.title_body(text[start:end]) label_character = letter_for(index) children.append( Node(body, label=( label + [label_character]), title=seg_title, node_type=Node.APPENDIX)) return Node(text[:segments[0][0]], children, label, title, Node.APPENDIX)
def test_segments(self): lines = [ "nonsection here", "Followed By A Title", "And then some content", "More content", "Yet Another Title", "Third Title", "Content here, too" ] offsets = generic.segments("\n".join(lines)) self.assertEqual(3, len(offsets)) start = len(lines[0] + "\n") end = len("\n".join(lines[:4]) + "\n") self.assertEqual((start, end), offsets[0]) start = len("\n".join(lines[:4]) + "\n") end = len("\n".join(lines[:5]) + "\n") self.assertEqual((start, end), offsets[1]) start = len("\n".join(lines[:5]) + "\n") end = len("\n".join(lines)) self.assertEqual((start, end), offsets[2])
def test_segments(self): lines = ["nonsection here", "Followed By A Title", "And then some content", "More content", "Yet Another Title", "Third Title", "Content here, too"] offsets = generic.segments("\n".join(lines)) self.assertEqual(3, len(offsets)) start = len(lines[0] + "\n") end = len("\n".join(lines[:4]) + "\n") self.assertEqual((start, end), offsets[0]) start = len("\n".join(lines[:4]) + "\n") end = len("\n".join(lines[:5]) + "\n") self.assertEqual((start, end), offsets[1]) start = len("\n".join(lines[:5]) + "\n") end = len("\n".join(lines)) self.assertEqual((start, end), offsets[2])