Beispiel #1
0
    def __init__(self, index=0, start=None, end=None, text='', position=''):
        try:
            self.index = int(index)
        except (TypeError, ValueError):  # try to cast as int, but it's not mandatory
            self.index = index

        self.start = SubRipTime.coerce(start or 0)
        self.end = SubRipTime.coerce(end or 0)
        self.position = str(position)
        self.text = str(text)
Beispiel #2
0
    def __init__(self, index=0, start=None, end=None, text='', position=''):
        try:
            self.index = int(index)
        except (TypeError, ValueError): # try to cast as int, but it's not mandatory
            self.index = index

        self.start = SubRipTime.coerce(start or 0)
        self.end = SubRipTime.coerce(end or 0)
        self.position = str(position)
        self.text = str(text)
Beispiel #3
0
    def write_into(self, output_file, eol=None):
        """
        write_into(output_file [, eol])

        Serialize current state into `output_file`.

        `output_file` -> Any instance that respond to `write()`, typically a
        file object
        """
        output_eol = eol or self.eol

        for item in self:
            string_repr = str(item)
            if output_eol != '\n':
                string_repr = string_repr.replace('\n', output_eol)
            output_file.write(string_repr)
            # Only add trailing eol if it's not already present.
            # It was kept in the SubRipItem's text before but it really
            # belongs here. Existing applications might give us subtitles
            # which already contain a trailing eol though.
            if not string_repr.endswith(2 * output_eol):
                output_file.write(output_eol)
Beispiel #4
0
    def write_into(self, output_file, eol=None):
        """
        write_into(output_file [, eol])

        Serialize current state into `output_file`.

        `output_file` -> Any instance that respond to `write()`, typically a
        file object
        """
        output_eol = eol or self.eol

        for item in self:
            string_repr = str(item)
            if output_eol != '\n':
                string_repr = string_repr.replace('\n', output_eol)
            output_file.write(string_repr)
            # Only add trailing eol if it's not already present.
            # It was kept in the SubRipItem's text before but it really
            # belongs here. Existing applications might give us subtitles
            # which already contain a trailing eol though.
            if not string_repr.endswith(2 * output_eol):
                output_file.write(output_eol)
Beispiel #5
0
except ImportError:
    from UserList import UserList

from itertools import chain
from copy import copy

from pysrt.srtexc import Error
from pysrt.srtitem import SubRipItem
from pysrt.compat import str

BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'),
        (codecs.BOM_UTF32_BE, 'utf_32_be'),
        (codecs.BOM_UTF16_LE, 'utf_16_le'),
        (codecs.BOM_UTF16_BE, 'utf_16_be'),
        (codecs.BOM_UTF8, 'utf_8'))
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)


class SubRipFile(UserList, object):
    """
    SubRip file descriptor.

    Provide a pure Python mapping on all metadata.

    SubRipFile(items, eol, path, encoding)

    items -> list of SubRipItem. Default to [].
    eol -> str: end of line character. Default to linesep used in opened file
        if any else to os.linesep.
    path -> str: path where file will be saved. To open an existant file see
Beispiel #6
0
 def test_idempotence(self):
     vtt = SubRipItem.from_string(self.vtt)
     self.assertEqual(str(vtt), self.vtt)
     item = SubRipItem.from_string(self.coordinates)
     self.assertEqual(str(item), self.coordinates)
Beispiel #7
0
 def test_serialization(self):
     self.assertEqual(str(self.item), self.string)
Beispiel #8
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding='utf_8').read()
     iterator = zip(pysrt.open(self.utf8_path),
                    pysrt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
Beispiel #9
0
 def __str__(self):
     if self.ordinal < 0:
         # Represent negative times as zero
         return str(SubRipTime.from_ordinal(0))
     return self.TIME_PATTERN % tuple(self)
Beispiel #10
0
 def __str__(self):
     if self.ordinal < 0:
         # Represent negative times as zero
         return str(SubRipTime.from_ordinal(0))
     return self.TIME_PATTERN % tuple(self)
Beispiel #11
0
except ImportError:
    from UserList import UserList

from itertools import chain
from copy import copy

from pysrt.srtexc import Error
from pysrt.srtitem import SubRipItem
from pysrt.compat import str

BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'),
        (codecs.BOM_UTF32_BE, 'utf_32_be'),
        (codecs.BOM_UTF16_LE, 'utf_16_le'),
        (codecs.BOM_UTF16_BE, 'utf_16_be'),
        (codecs.BOM_UTF8, 'utf_8'))
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)


class SubRipFile(UserList, object):
    """
    SubRip file descriptor.

    Provide a pure Python mapping on all metadata.

    SubRipFile(items, eol, path, encoding)

    items -> list of SubRipItem. Default to [].
    eol -> str: end of line character. Default to linesep used in opened file
        if any else to os.linesep.
    path -> str: path where file will be saved. To open an existant file see
Beispiel #12
0
 def test_compare_from_string_and_from_path(self):
     unicode_content = codecs.open(self.utf8_path, encoding="utf_8").read()
     iterator = zip(pysrt.open(self.utf8_path), pysrt.from_string(unicode_content))
     for file_item, string_item in iterator:
         self.assertEqual(str(file_item), str(string_item))
Beispiel #13
0
 def test_idempotence(self):
     vtt = SubRipItem.from_string(self.vtt)
     self.assertEqual(str(vtt), self.vtt)
     item = SubRipItem.from_string(self.coordinates)
     self.assertEqual(str(item), self.coordinates)
Beispiel #14
0
 def test_serialization(self):
     self.assertEqual(str(self.item), self.string)
Beispiel #15
0
class SubRipItem(ComparableMixin):
    """
    SubRipItem(index, start, end, text, position)

    index -> int: index of item in file. 0 by default.
    start, end -> SubRipTime or coercible.
    text -> unicode: text content for item.
    position -> unicode: raw srt/vtt "display coordinates" string
    """
    ITEM_PATTERN = str('%s\n%s --> %s%s\n%s\n')
    TIMESTAMP_SEPARATOR = '-->'

    def __init__(self, index=0, start=None, end=None, text='', position=''):
        try:
            self.index = int(index)
        except (TypeError,
                ValueError):  # try to cast as int, but it's not mandatory
            self.index = index

        self.start = SubRipTime.coerce(start or 0)
        self.end = SubRipTime.coerce(end or 0)
        self.position = str(position)
        self.text = str(text)

    @property
    def duration(self):
        return self.end - self.start

    @property
    def text_without_tags(self):
        RE_TAG = re.compile(r'<[^>]*?>')
        return RE_TAG.sub('', self.text)

    @property
    def characters_per_second(self):
        characters_count = len(self.text_without_tags.replace('\n', ''))
        try:
            return characters_count / (self.duration.ordinal / 1000.0)
        except ZeroDivisionError:
            return 0.0

    def __str__(self):
        position = ' %s' % self.position if self.position.strip() else ''
        return self.ITEM_PATTERN % (self.index, self.start, self.end, position,
                                    self.text)

    if is_py2:
        __unicode__ = __str__

        def __str__(self):
            raise NotImplementedError('Use unicode() instead!')

    def _cmpkey(self):
        return (self.start, self.end)

    def shift(self, *args, **kwargs):
        """
        shift(hours, minutes, seconds, milliseconds, ratio)

        Add given values to start and end attributes.
        All arguments are optional and have a default value of 0.
        """
        self.start.shift(*args, **kwargs)
        self.end.shift(*args, **kwargs)

    @classmethod
    def from_string(cls, source):
        return cls.from_lines(source.splitlines(True))

    @classmethod
    def from_lines(cls, lines):
        if len(lines) < 2:
            raise InvalidItem()
        lines = [l.rstrip() for l in lines]
        index = None
        if cls.TIMESTAMP_SEPARATOR not in lines[0]:
            index = lines.pop(0)
        start, end, position = cls.split_timestamps(lines[0])
        body = '\n'.join(lines[1:])
        return cls(index, start, end, body, position)

    @classmethod
    def split_timestamps(cls, line):
        timestamps = line.split(cls.TIMESTAMP_SEPARATOR)
        if len(timestamps) != 2:
            raise InvalidItem()
        start, end_and_position = timestamps
        end_and_position = end_and_position.lstrip().split(' ', 1)
        end = end_and_position[0]
        position = end_and_position[1] if len(end_and_position) > 1 else ''
        return (s.strip() for s in (start, end, position))