def __init__(self, index=0, start=None, end=None, text='', position=''): try: self.index = int(index) except (TypeError, ValueError): # try to cast as int, but it's not mandatory self.index = index self.start = SubRipTime.coerce(start or 0) self.end = SubRipTime.coerce(end or 0) self.position = str(position) self.text = str(text)
def write_into(self, output_file, eol=None): """ write_into(output_file [, eol]) Serialize current state into `output_file`. `output_file` -> Any instance that respond to `write()`, typically a file object """ output_eol = eol or self.eol for item in self: string_repr = str(item) if output_eol != '\n': string_repr = string_repr.replace('\n', output_eol) output_file.write(string_repr) # Only add trailing eol if it's not already present. # It was kept in the SubRipItem's text before but it really # belongs here. Existing applications might give us subtitles # which already contain a trailing eol though. if not string_repr.endswith(2 * output_eol): output_file.write(output_eol)
except ImportError: from UserList import UserList from itertools import chain from copy import copy from pysrt.srtexc import Error from pysrt.srtitem import SubRipItem from pysrt.compat import str BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'), (codecs.BOM_UTF32_BE, 'utf_32_be'), (codecs.BOM_UTF16_LE, 'utf_16_le'), (codecs.BOM_UTF16_BE, 'utf_16_be'), (codecs.BOM_UTF8, 'utf_8')) CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS) BIGGER_BOM = max(len(bom) for bom, encoding in BOMS) class SubRipFile(UserList, object): """ SubRip file descriptor. Provide a pure Python mapping on all metadata. SubRipFile(items, eol, path, encoding) items -> list of SubRipItem. Default to []. eol -> str: end of line character. Default to linesep used in opened file if any else to os.linesep. path -> str: path where file will be saved. To open an existant file see
def test_idempotence(self): vtt = SubRipItem.from_string(self.vtt) self.assertEqual(str(vtt), self.vtt) item = SubRipItem.from_string(self.coordinates) self.assertEqual(str(item), self.coordinates)
def test_serialization(self): self.assertEqual(str(self.item), self.string)
def test_compare_from_string_and_from_path(self): unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read() iterator = zip(pysrt.open(self.utf8_path), pysrt.from_string(unicode_content)) for file_item, string_item in iterator: self.assertEqual(str(file_item), str(string_item))
def __str__(self): if self.ordinal < 0: # Represent negative times as zero return str(SubRipTime.from_ordinal(0)) return self.TIME_PATTERN % tuple(self)
def test_compare_from_string_and_from_path(self): unicode_content = codecs.open(self.utf8_path, encoding="utf_8").read() iterator = zip(pysrt.open(self.utf8_path), pysrt.from_string(unicode_content)) for file_item, string_item in iterator: self.assertEqual(str(file_item), str(string_item))
class SubRipItem(ComparableMixin): """ SubRipItem(index, start, end, text, position) index -> int: index of item in file. 0 by default. start, end -> SubRipTime or coercible. text -> unicode: text content for item. position -> unicode: raw srt/vtt "display coordinates" string """ ITEM_PATTERN = str('%s\n%s --> %s%s\n%s\n') TIMESTAMP_SEPARATOR = '-->' def __init__(self, index=0, start=None, end=None, text='', position=''): try: self.index = int(index) except (TypeError, ValueError): # try to cast as int, but it's not mandatory self.index = index self.start = SubRipTime.coerce(start or 0) self.end = SubRipTime.coerce(end or 0) self.position = str(position) self.text = str(text) @property def duration(self): return self.end - self.start @property def text_without_tags(self): RE_TAG = re.compile(r'<[^>]*?>') return RE_TAG.sub('', self.text) @property def characters_per_second(self): characters_count = len(self.text_without_tags.replace('\n', '')) try: return characters_count / (self.duration.ordinal / 1000.0) except ZeroDivisionError: return 0.0 def __str__(self): position = ' %s' % self.position if self.position.strip() else '' return self.ITEM_PATTERN % (self.index, self.start, self.end, position, self.text) if is_py2: __unicode__ = __str__ def __str__(self): raise NotImplementedError('Use unicode() instead!') def _cmpkey(self): return (self.start, self.end) def shift(self, *args, **kwargs): """ shift(hours, minutes, seconds, milliseconds, ratio) Add given values to start and end attributes. All arguments are optional and have a default value of 0. """ self.start.shift(*args, **kwargs) self.end.shift(*args, **kwargs) @classmethod def from_string(cls, source): return cls.from_lines(source.splitlines(True)) @classmethod def from_lines(cls, lines): if len(lines) < 2: raise InvalidItem() lines = [l.rstrip() for l in lines] index = None if cls.TIMESTAMP_SEPARATOR not in lines[0]: index = lines.pop(0) start, end, position = cls.split_timestamps(lines[0]) body = '\n'.join(lines[1:]) return cls(index, start, end, body, position) @classmethod def split_timestamps(cls, line): timestamps = line.split(cls.TIMESTAMP_SEPARATOR) if len(timestamps) != 2: raise InvalidItem() start, end_and_position = timestamps end_and_position = end_and_position.lstrip().split(' ', 1) end = end_and_position[0] position = end_and_position[1] if len(end_and_position) > 1 else '' return (s.strip() for s in (start, end, position))