def testScanMultiHit(self): data = b"foobarfoo" chunk = streaming.Chunk(offset=0, data=data) spans = list(chunk.Scan(conditions.LiteralMatcher(b"foo"))) self.assertLen(spans, 2) self.assertEqual(spans[0], self.Span(begin=0, end=3)) self.assertEqual(spans[1], self.Span(begin=6, end=9))
def testScanWithOverlap(self): data = "foofoobarfoofoo" chunk = streaming.Chunk(offset=0, data=data, overlap=8) spans = list(chunk.Scan(conditions.LiteralMatcher("foo"))) self.assertEqual(len(spans), 2) self.assertEqual(spans[0], self.Span(begin=9, end=12)) self.assertEqual(spans[1], self.Span(begin=12, end=15))
def testScanWithOverlapOverlapping(self): data = "oooooo" chunk = streaming.Chunk(offset=0, data=data, overlap=3) spans = list(chunk.Scan(conditions.LiteralMatcher("oo"))) self.assertEqual(len(spans), 2) self.assertEqual(spans[0], self.Span(begin=2, end=4)) self.assertEqual(spans[1], self.Span(begin=4, end=6))
def testScanOverlappedHits(self): data = "xoxoxoxo" chunk = streaming.Chunk(offset=0, data=data) spans = list(chunk.Scan(conditions.LiteralMatcher("xoxo"))) self.assertEqual(len(spans), 2) self.assertEqual(spans[0], self.Span(begin=0, end=4)) self.assertEqual(spans[1], self.Span(begin=4, end=8))