def stream(cls, source_file, error_handling=ERROR_PASS): """ stream(source_file, [error_handling]) This method yield SubRipItem instances a soon as they have been parsed without storing them. It is a kind of SAX parser for .srt files. `source_file` -> Any iterable that yield unicode strings, like a file opened with `codecs.open()` or an array of unicode. Example: >>> import pysrt >>> import codecs >>> file = codecs.open('movie.srt', encoding='utf-8') >>> for sub in pysrt.stream(file): ... sub.text += "\nHello !" ... print unicode(sub) """ string_buffer = [] for index, line in enumerate(chain(source_file, '\n')): if line.strip(): string_buffer.append(line) else: source = string_buffer string_buffer = [] if source and all(source): try: yield SubRipItem.from_lines(source) except Error as error: error.args += (''.join(source), ) cls._handle_error(error, error_handling, index)
def stream(cls, source_file, error_handling=ERROR_PASS): """ stream(source_file, [error_handling]) This method yield SubRipItem instances a soon as they have been parsed without storing them. It is a kind of SAX parser for .srt files. `source_file` -> Any iterable that yield unicode strings, like a file opened with `codecs.open()` or an array of unicode. Example: >>> from pysrt import SubRipFile >>> import codecs >>> file = codecs.open('movie.srt', encoding='utf-8') >>> for sub in SubRipFile.stream(file): ... sub.text += "\nHello !" ... print unicode(sub) """ string_buffer = [] # weird bug workaround if hasattr(source_file, 'seek'): position = source_file.tell() # under Python 2.5 this call return the second line of the file # instead of the first character. It's probably a buffering bug # in the codecs module. I've not found a better fix... source_file.read(1) source_file.seek(position) for index, line in enumerate(chain(source_file, u'\n')): if line.strip(): string_buffer.append(line) else: source = string_buffer string_buffer = [] if source and all(source): try: yield SubRipItem.from_lines(source) except Error, error: error.args += (''.join(source), ) cls._handle_error(error, error_handling, index)
def stream(cls, source_file, error_handling=ERROR_PASS): """ stream(source_file, [error_handling]) This method yield SubRipItem instances a soon as they have been parsed without storing them. It is a kind of SAX parser for .srt files. `source_file` -> Any iterable that yield unicode strings, like a file opened with `codecs.open()` or an array of unicode. Example: >>> import pysrt >>> import codecs >>> file = codecs.open('movie.srt', encoding='utf-8') >>> for sub in pysrt.stream(file): ... sub.text += "\nHello !" ... print unicode(sub) """ string_buffer = [] # weird bug workaround if hasattr(source_file, 'seek'): position = source_file.tell() # under Python 2.5 this call return the second line of the file # instead of the first character. It's probably a buffering bug # in the codecs module. I've not found a better fix... source_file.read(1) source_file.seek(position) for index, line in enumerate(chain(source_file, u'\n')): if line.strip(): string_buffer.append(line) else: source = string_buffer string_buffer = [] if source and all(source): try: yield SubRipItem.from_lines(source) except Error, error: error.args += (''.join(source), ) cls._handle_error(error, error_handling, index)