def iter_text(self, file, encoding=None): if isinstance(file, io.TextIOBase): stream = file else: stream = codecs.getreader(encoding or "latin1")(file) regex_stream = RegexStream(stream, self.URL_REGEX) for match, text in regex_stream.stream(): yield (text, bool(match))
def iter_text(self, file, encoding=None): if isinstance(file, io.TextIOBase): stream = file else: stream = codecs.getreader(encoding or 'latin1')(file) regex_stream = RegexStream(stream, self.URL_REGEX) for match, text in regex_stream.stream(): if match: yield (text, 'import' if match.group(3) else 'url') else: yield (text, False)
def test_stream(self): my_file = io.StringIO('fish dog horse bat dolphin') pattern = re.compile(r'(horse|dog|bat)') streamer = RegexStream(my_file, pattern, read_size=5, overlap_size=2) fragments = list([(bool(match), text) for match, text in streamer.stream()]) self.assertEqual([ (False, 'fish '), (True, 'dog'), (False, ' '), (False, ' '), (True, 'horse'), (False, ' '), (True, 'bat'), (False, ' dolp'), (False, 'hin'), ], fragments)
def test_stream(self): my_file = io.StringIO('fish dog horse bat dolphin') pattern = re.compile(r'(horse|dog|bat)') streamer = RegexStream(my_file, pattern, read_size=5, overlap_size=2) fragments = list( [(bool(match), text) for match, text in streamer.stream()]) self.assertEqual( [ (False, 'fish '), (True, 'dog'), (False, ' '), (False, ' '), (True, 'horse'), (False, ' '), (True, 'bat'), (False, ' dolp'), (False, 'hin'), ], fragments )