def test_find_segments_seg_ids(self): seg_ids = [] def offsets(text, seg_id, exclude): if text: seg_ids.append(seg_id) return (4, 9) text = "This is some text, lalalala text text song." search.segments(text, offsets) self.assertEqual([0, 1, 2, 3, 4], seg_ids)
def subparts(text): """ Return a list of subpart offset. Does not include appendices, supplements. """ def offsets_fn(remaining_text, idx, excludes): return next_subpart_offsets(remaining_text) return segments(text, offsets_fn)
def test_find_segments_excludes(self): excludes = [] def offsets(text, seg_id, exclude): if text: excludes.append(exclude) return (4, 9) text = "This is some text, lalalala text text song." search.segments(text, offsets, [(20, 24), (3, 5)]) self.assertEqual(5, len(excludes)) for i in range(5): self.assertEqual(2, len(excludes[i])) self.assertEqual([(20, 24), (3, 5)], excludes[0]) self.assertEqual([(11, 15), (-6, -4)], excludes[1]) self.assertEqual((2, 6), excludes[2][0]) self.assertEqual((-7, -3), excludes[3][0])
def paragraphs(self, text, p_level, exclude=[]): """Return a list of paragraph offsets defined by the level param.""" def offsets_fn(remaining_text, p_idx, exclude): return self.paragraph_offsets(remaining_text, p_level, p_idx, exclude) return segments(text, offsets_fn, exclude)
def paragraphs(self, text, p_level, exclude=None): """Return a list of paragraph offsets defined by the level param.""" if exclude is None: exclude = [] def offsets_fn(remaining_text, p_idx, exclude): return self.paragraph_offsets( remaining_text, p_level, p_idx, exclude) return segments(text, offsets_fn, exclude)
def test_find_segments_offsets(self): def offsets(text, seg_id, exclude): if text: return (4, 9) text = "This is some text, lalalala text text song." segs = search.segments(text, offsets) self.assertEqual(5, len(segs)) self.assertEqual((4, 9), segs[0]) self.assertEqual((13, 18), segs[1]) self.assertEqual((22, 27), segs[2]) self.assertEqual((31, 36), segs[3]) self.assertEqual((40, 45), segs[4])
def sections(text, part): """Return a list of section offsets. Does not include appendices.""" def offsets_fn(remaining_text, idx, excludes): return next_section_offsets(remaining_text, part) return segments(text, offsets_fn)
def appendix_sections(text, appendix): """Split an appendix into its sections. Return the offsets""" def offsets_fn(remaining_text, idx, excludes): return find_next_appendix_section_offsets(remaining_text, appendix) return search.segments(text, offsets_fn)
def appendices(text): """Carve out a list of all the appendix offsets.""" def offsets_fn(remaining_text, idx, excludes): return find_next_appendix_offsets(remaining_text) return search.segments(text, offsets_fn)
def segments(text): """Return a list of segment offsets. See find_next_segment()""" def offsets_fn(remaining_text, idx, excludes): return find_next_segment(remaining_text) return search.segments(text, offsets_fn)