Beispiel #1
0
 def test_linebreaks(self):
     sset = SubtitleSet('en')
     sset.append_subtitle(0, 1000, '''line 1<br />line 2<br />line 3''',
                          escape=False)
     sset.append_subtitle(1000,200, 'second sub')
     output = unicode(TXTGenerator(sset))
     self.assertEqual(output, TXT_LINEBREAKS)
Beispiel #2
0
class JSONParser(BaseTextParser):
    file_type = 'json'

    def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True):
        self.input_string = input_string
        self.pattern = pattern
        self.language = language
        super(JSONParser, self).__init__(input_string, pattern, language=language,
            flags=[], eager_parse=eager_parse)

    def to_internal(self):
        if not hasattr(self, 'sub_set'):
            self.sub_set = SubtitleSet(self.language)

            try:
                data = json.loads(self.input_string)
            except ValueError:
                raise SubtitleParserError("Invalid JSON data provided.")

            # Sort by the ``position`` key
            data = sorted(data, key=lambda k: k['position'])

            for sub in data:
                self.sub_set.append_subtitle(sub['start'], sub['end'],
                    sub['text'])

        return self.sub_set
class JSONParser(BaseTextParser):
    file_type = 'json'

    def __init__(self,
                 input_string,
                 pattern,
                 language=None,
                 flags=[],
                 eager_parse=True):
        self.input_string = input_string
        self.pattern = pattern
        self.language = language
        super(JSONParser, self).__init__(input_string,
                                         pattern,
                                         language=language,
                                         flags=[],
                                         eager_parse=eager_parse)

    def to_internal(self):
        if not hasattr(self, 'sub_set'):
            self.sub_set = SubtitleSet(self.language)

            try:
                data = json.loads(self.input_string)
            except ValueError:
                raise SubtitleParserError("Invalid JSON data provided.")

            # Sort by the ``position`` key
            data = sorted(data, key=lambda k: k['position'])

            for sub in data:
                self.sub_set.append_subtitle(sub['start'], sub['end'],
                                             sub['text'])

        return self.sub_set
Beispiel #4
0
    def test_linebreaks(self):
        sset = SubtitleSet('en')
        sset.append_subtitle(0, 1000, '''line 1
line 2
line 3''')
        sset.append_subtitle(1000, 200, 'second sub')
        output = unicode(TXTGenerator(sset))
        self.assertEqual(output, TXT_LINEBREAKS)
Beispiel #5
0
 def test_add_subtitles_with_complete_true_but_unsynced_subs(self):
     # test adding subtitles with complete=True, but the subtitles
     # themseleves aren't complete.  For this corner case, we should not
     # emit subtitles_published.
     subs = SubtitleSet(language_code='en')
     subs.append_subtitle(None, None, 'content')
     pipeline.add_subtitles(self.video, 'en', subs, complete=True)
     assert_equal(self.subtitles_published_handler.call_count, 0)
Beispiel #6
0
def create_subtitle_set(number_of_subtitles=0, synced=True):
    subtitle_set = SubtitleSet('en')

    for x in xrange(0, number_of_subtitles+1):
        start = x * 1000 if synced else None
        end = x * 1000 + 1000 if synced else None
        subtitle_set.append_subtitle(start, end, 'hey you %s' % x)

    return subtitle_set
Beispiel #7
0
class YoutubeParser(BaseTextParser):

    file_type = 'youtube'

    def __init__(self, input_string, language_code):
        self.language_code = language_code
        self._pattern = None

        self.input_string = input_string
        self.language = language_code

    def __iter__(self):
        if not hasattr(self, 'sub_set'):
            self.to_internal()

        for sub in self.sub_set:
            yield sub

    def to_internal(self):
        if not hasattr(self, 'sub_set'):
            try:
                self.sub_set = SubtitleSet(self.language)
                xml = etree.fromstring(self.input_string.encode('utf-8'))

                has_subs = False
                total_items = len(xml)
                for i,item in enumerate(xml):
                    duration = 0
                    start = int(float(item.get('start')) * 1000)
                    if hasattr(item, 'duration'):
                        duration = int(float(item.get('dur', 0)) * 1000)
                    elif i+1 < total_items:
                        # youtube sometimes omits the duration attribute
                        # in this case we're displaying until the next sub
                        # starts
                        next_item = xml[i+1]
                        duration = int(float(next_item.get('start')) * 1000) - start
                    else:
                        # hardcode the last sub duration at 3 seconds
                        duration = 3000
                    end = start + duration
                    text = item.text and unescape_html(item.text) or u''
                    self.sub_set.append_subtitle(start, end, text)
                    has_subs = True
                if not has_subs:
                    raise ValueError("No subs")
            except Exception as e:
                raise SubtitleParserError(original_error=e)


        return self.sub_set
Beispiel #8
0
    def _create_two_sub_forked_subs(self, request):
        session = create_two_sub_dependent_session(request)
        # now fork subtitles
        response = rpc.start_editing(
            request, session.video.video_id, 'es',
            subtitle_language_pk=session.video.subtitle_language('es').pk)

        session_pk = response['session_pk']

        subtitle_set = SubtitleSet('es')
        subtitle_set.append_subtitle(500, 1500, 'hey')
        subtitle_set.append_subtitle(1600, 2500, 'you')

        rpc.finished_subtitles(request, session_pk, subtitle_set.to_xml(), forked=True)
        return Video.objects.get(pk=session.video.pk)
Beispiel #9
0
def _add_subtitles(sub_lang, num_subs, video, translated_from=None):
    subtitle_set = SubtitleSet(sub_lang.language_code)

    for i in xrange(0, num_subs):
        start_time=i * 1000
        end_time =i + 800
        subtitle_text = 'hey jude %s' % i
        subtitle_set.append_subtitle(start_time, end_time, subtitle_text)

    parents = []

    if translated_from:
        parents.append(translated_from.get_tip())

    return pipeline.add_subtitles(video, sub_lang.language_code, subtitle_set, parents=parents)
Beispiel #10
0
 def test_log_in_then_save(self):
     request_0 = RequestMockup(NotAuthenticatedUser())
     return_value = rpc.show_widget(request_0, VIDEO_URL, False)
     video_id = return_value['video_id']
     return_value = rpc.start_editing(
         request_0, video_id, 'en', original_language_code='en')
     session_pk = return_value['session_pk']
     sset = SubtitleSet('en')
     sset.append_subtitle(2300, 3400, 'hey')
     response = rpc.regain_lock(request_0, session_pk)
     self.assertEqual('ok', response['response'])
     request_0.user = self.user_0
     rpc.finished_subtitles(request_0, session_pk, sset.to_xml())
     sversion = sub_models.SubtitleVersion.objects.order_by('-pk')[0]
     sversion.subtitle_count = 1
     self.assertEqual(request_0.user.pk, sversion.author.pk)
Beispiel #11
0
    def test_fork_translation_dependent_on_forked(self):
        request = RequestMockup(self.user_0)
        video = self._create_two_sub_forked_subs(request)
        response = rpc.start_editing(request, video.video_id, 'fr', base_language_code='es')
        session_pk = response['session_pk']
        rpc.finished_subtitles(request, session_pk, create_subtitle_set(2).to_xml())

        fr_sl = models.Video.objects.get(video_id=video.video_id).subtitle_language('fr')
        self.assertEquals(False, fr_sl.is_forked)

        # now fork french
        return_value = rpc.show_widget(request, VIDEO_URL, False)
        video_id = return_value['video_id']
        fr_sl = models.Video.objects.get(video_id=video_id).subtitle_language('fr')
        response = rpc.start_editing(request, video_id, 'fr', subtitle_language_pk=fr_sl.pk)
        session_pk = response['session_pk']

        subtitles = SubtitleSet('fr', response['subtitles']['subtitles'])

        self.assertEquals(3, len(subtitles))
        self.assertEquals('hey you 0', subtitles[0].text)
        self.assertEquals(0, subtitles[0].start_time)
        self.assertEquals(1000, subtitles[0].end_time)

        # update the timing on the French sub.
        updated = SubtitleSet('fr')

        updated.append_subtitle(1020, 1500, 'hey 0')
        updated.append_subtitle(2500, 3500, 'hey 1')

        rpc.finished_subtitles(request, session_pk, updated.to_xml(), forked=True)

        french_lang = models.Video.objects.get(video_id=video_id).subtitle_language('fr')
        fr_version = french_lang.get_tip()
        fr_version_subtitles = fr_version.get_subtitles()

        self.assertTrue(french_lang.is_forked)
        self.assertEquals(1020, fr_version_subtitles[0].start_time)

        spanish_lang = models.Video.objects.get(video_id=video_id).subtitle_language('es')
        es_version = spanish_lang.get_tip()
        es_version_subtitles = es_version.get_subtitles()

        self.assertEquals(True, spanish_lang.is_forked)
        self.assertEquals(500, es_version_subtitles[0].start_time)
Beispiel #12
0
class TXTParser(BaseTextParser):

    file_type = 'txt'

    _linebreak_re = re.compile(r"\n\n|\r\n\r\n|\r\r")

    def __init__(self,
                 input_string,
                 language=None,
                 linebreak_re=_linebreak_re,
                 eager_parse=True):
        self.language = language
        self.input_string = linebreak_re.split(input_string)

        if eager_parse:
            self.to_internal()

    def __len__(self):
        return len(self.input_string)

    def __nonzero__(self):
        return bool(self.input_string)

    def _result_iter(self):
        for item in self.input_string:
            output = {}
            output['start'] = None
            output['end'] = None
            output['text'] = utils.strip_tags(item)
            yield output

    def to_internal(self):

        if not hasattr(self, 'sub_set'):
            self.sub_set = SubtitleSet(self.language)
            valid = False
            for item in self._result_iter():
                item['text'] = item['text'].replace("\n", '<br/>')
                if not valid and ''.join(item['text'].split()):
                    valid = True
                self.sub_set.append_subtitle(item['start'], item['end'],
                                             item['text'])
            if not valid:
                raise SubtitleParserError("No subs")
        return self.sub_set
Beispiel #13
0
    def test_unsynced_generator(self):
        subs = SubtitleSet('en')
        for x in xrange(0,5):
            subs.append_subtitle(None, None,"%s" % x)
        output = unicode(SBVGenerator(subs))

        parsed = SBVParser(output,'en')
        internal = parsed.to_internal()

        subs = [x for x in internal.subtitle_items()]
        self.assertEqual(len(internal), 5)
        for i,sub in enumerate(subs):
            self.assertEqual(sub[0], None )
            self.assertEqual(sub[1], None )
        generated = SBVGenerator(internal)
        self.assertEqual(generated.format_time(None), u'9:59:59.990')
        self.assertIn(u'''9:59:59.990,9:59:59.990\r\n0\r\n\r\n9:59:59.990,9:59:59.990\r\n1\r\n\r\n9:59:59.990,9:59:59.990\r\n2\r\n\r\n9:59:59.990,9:59:59.990\r\n3\r\n\r\n9:59:59.990,9:59:59.990\r\n4\r\n''',
            unicode(generated))
Beispiel #14
0
    def test_unsynced_generator(self):
        subs = SubtitleSet('en')
        for x in xrange(0,5):
            subs.append_subtitle(None, None,"%s" % x)
        output = unicode(DFXPGenerator(subs))

        parsed = DFXPParser(output, 'en')
        internal = parsed.to_internal()

        subs = [x for x in internal.subtitle_items()]
        self.assertEqual(len(internal), 5)
        for i,sub in enumerate(subs):
            self.assertIsNone(sub[0])
            self.assertIsNone(sub[1])
            self.assertEqual(sub[2], str(i))

        for node in internal.get_subtitles():
            self.assertIsNone(get_attr(node, 'begin'))
            self.assertIsNone(get_attr(node, 'end'))
Beispiel #15
0
    def test_unsynced_generator(self):
        subs = SubtitleSet('en')
        for x in xrange(0, 5):
            subs.append_subtitle(None, None, "%s" % x)
        output = unicode(SBVGenerator(subs, language='en'))

        parsed = SBVParser(output, 'en')
        internal = parsed.to_internal()

        subs = [x for x in internal.subtitle_items()]
        self.assertEqual(len(internal), 5)
        for i, sub in enumerate(subs):
            self.assertEqual(sub[0], None)
            self.assertEqual(sub[1], None)
        generated = SBVGenerator(internal)
        self.assertEqual(generated.format_time(None), u'9:59:59.000')
        self.assertIn(
            u'''9:59:59.000,9:59:59.000\r\n0\r\n\r\n9:59:59.000,9:59:59.000\r\n1\r\n\r\n9:59:59.000,9:59:59.000\r\n2\r\n\r\n9:59:59.000,9:59:59.000\r\n3\r\n\r\n9:59:59.000,9:59:59.000\r\n4\r\n''',
            unicode(generated))
Beispiel #16
0
    def test_unsynced_generator(self):
        subs = SubtitleSet('en')
        for x in xrange(0,5):
            subs.append_subtitle(None, None,"%s" % x)
        output = unicode(DFXPGenerator(subs))

        parsed = DFXPParser(output, 'en')
        internal = parsed.to_internal()

        subs = [x for x in internal.subtitle_items()]
        self.assertEqual(len(internal), 5)
        for i,sub in enumerate(subs):
            self.assertIsNone(sub[0])
            self.assertIsNone(sub[1])
            self.assertEqual(sub[2], str(i))

        for node in internal.get_subtitles():
            self.assertIsNone(get_attr(node, 'begin'))
            self.assertIsNone(get_attr(node, 'end'))
Beispiel #17
0
    def test_unsynced_generator(self):
        subs = SubtitleSet("en")
        for x in xrange(0, 5):
            subs.append_subtitle(None, None, "%s" % x)
        output = unicode(SBVGenerator(subs, language="en"))

        parsed = SBVParser(output, "en")
        internal = parsed.to_internal()

        subs = [x for x in internal.subtitle_items()]
        self.assertEqual(len(internal), 5)
        for i, sub in enumerate(subs):
            self.assertEqual(sub[0], None)
            self.assertEqual(sub[1], None)
        generated = SBVGenerator(internal)
        self.assertEqual(generated.format_time(None), u"9:59:59.000")
        self.assertIn(
            u"""9:59:59.000,9:59:59.000\r\n0\r\n\r\n9:59:59.000,9:59:59.000\r\n1\r\n\r\n9:59:59.000,9:59:59.000\r\n2\r\n\r\n9:59:59.000,9:59:59.000\r\n3\r\n\r\n9:59:59.000,9:59:59.000\r\n4\r\n""",
            unicode(generated),
        )
Beispiel #18
0
class TXTParser(BaseTextParser):

    file_type = 'txt'

    _linebreak_re = re.compile(r"\n\n|\r\n\r\n|\r\r")

    def __init__(self, input_string, language=None, linebreak_re=_linebreak_re, eager_parse=True):
        self.language = language
        self.input_string = linebreak_re.split(input_string)

        if eager_parse:
            self.to_internal()

    def __len__(self):
        return len(self.input_string)

    def __nonzero__(self):
        return bool(self.input_string)

    def _result_iter(self):
        for item in self.input_string:
            output = {}
            output['start'] = None
            output['end'] = None
            output['text'] = utils.strip_tags(item)
            yield output

    def to_internal(self):

        if not hasattr(self, 'sub_set'):
            self.sub_set = SubtitleSet(self.language)
            valid = False
            for item in self._result_iter():
                item['text'] = item['text'].replace("\n", '<br/>')
                if not valid and ''.join(item['text'].split()):
                    valid = True
                self.sub_set.append_subtitle(item['start'], item['end'],
                                             item['text'], escape=False)
            if not valid:
                raise SubtitleParserError("No subs")
        return self.sub_set
Beispiel #19
0
    def test_unsynced_generator(self):
        subs = SubtitleSet('en')
        for x in xrange(0, 5):
            subs.append_subtitle(None, None, "%s" % x)
        output = unicode(SRTGenerator(subs))

        parsed = SRTParser(output, 'en')
        internal = parsed.to_internal()

        subs = [x for x in internal.subtitle_items()]
        self.assertEqual(len(internal), 5)

        for i, sub in enumerate(subs):
            self.assertEqual(sub.start_time, None)
            self.assertEqual(sub.end_time, None)

        generated = SRTGenerator(internal)
        self.assertEqual(generated.format_time(None), u'99:59:59,999')
        self.assertIn(
            u'''1\r\n99:59:59,999 --> 99:59:59,999\r\n0\r\n\r\n2\r\n99:59:59,999 --> 99:59:59,999\r\n1\r\n\r\n3\r\n99:59:59,999 --> 99:59:59,999\r\n2\r\n\r\n4\r\n99:59:59,999 --> 99:59:59,999\r\n3\r\n\r\n5\r\n99:59:59,999 --> 99:59:59,999\r\n4\r\n''',
            unicode(generated))
Beispiel #20
0
class TestFormatConvertion(TestCase):

    def setUp(self):
        self.subs = SubtitleSet(language_code='en')
        for x in range(0,10):
            self.subs.append_subtitle(
                from_ms=(x * 1000), to_ms=(x * 1000) + 1000,
                content="%s - and *italics* and **bold** and >>." % x
            )
            
    def _retrieve(self, format):
        res = self.client.post(reverse("widget:convert_subtitles"), {
            'subtitles': self.subs.to_xml(),
            'language_code': 'pt-br',
            'format': format,
        })
        self.assertEqual(res.status_code , 200)
        data = json.loads(res.content)
        self.assertNotIn('errors', data)
        parser = babelsubs.load_from(data['result'], format).to_internal()
        parsed = [x for x in parser.subtitle_items()]
        self.assertEqual(len(parsed), 10)
        return res.content, parsed


    def test_srt(self):
        raw, parsed = self._retrieve('srt')
        self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False}))

    def test_ssa(self):
        raw, parsed = self._retrieve('ssa')
        self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False}))

    def test_dfxp(self):
        raw, parsed = self._retrieve('dfxp')
        self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False}))

    def test_sbv(self):
        raw, parsed = self._retrieve('sbv')
        self.assertEqual(parsed[1], (1000, 2000, '1 - and *italics* and **bold** and >>.', {'new_paragraph': False}))
Beispiel #21
0
    def test_change_set(self):
        request = RequestMockup(self.user_0)
        session = create_two_sub_session(request)
        return_value = rpc.start_editing(request, session.video.video_id, 'en')
        session_pk = return_value['session_pk']

        subtitle_set = SubtitleSet('en')
        subtitle_set.append_subtitle(0, 1000, 'hey you 3')
        subtitle_set.append_subtitle(1000, 2000, 'hey you 1')
        subtitle_set.append_subtitle(2000, 3000, 'hey you 1')

        rpc.finished_subtitles(request, session_pk, subtitle_set.to_xml())
        video = Video.objects.get(pk=session.video.pk)
        language = video.subtitle_language('en')

        self.assertEqual(2, language.subtitleversion_set.full().count())

        version = language.get_tip()
        time_change, text_change = version.get_changes()

        self.assertTrue(text_change > 0 and text_change <= 1)
        self.assertEqual(time_change, 0)
Beispiel #22
0
    def test_dfxp_merge(self):
        en_subs = SubtitleSet('en')
        es_subs = SubtitleSet('es')
        en_subs.append_subtitle(1000, 1500, 'content')
        es_subs.append_subtitle(1000, 1500, 'spanish content')
        result = self.loader.dfxp_merge([en_subs, es_subs])

        utils.assert_long_text_equal(result, """\
<tt xmlns:tts="http://www.w3.org/ns/ttml#styling" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" xmlns:ttm="http://www.w3.org/ns/ttml#metadata" xmlns="http://www.w3.org/ns/ttml" xml:lang="">
    <head>
        <metadata>
            <ttm:title></ttm:title>
            <ttm:description></ttm:description>
            <ttm:copyright/>
        </metadata>
        <styling>
            <style xml:id="test-style" tts:color="white" tts:fontSize="18px"/>
        </styling>
        <layout>
            <region xml:id="bottom" style="test-style" tts:origin="0 80%" tts:extent="100% 20%"/>
            <region xml:id="top" style="test-style" tts:origin="0 0" tts:extent="100% 20%"/>
        </layout>
    </head>
    <body region="bottom">
        <div xml:lang="en">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">content</p>
            </div>
        </div>
        <div xml:lang="es">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">spanish content</p>
            </div>
        </div>
    </body>
</tt>
""")
Beispiel #23
0
class BaseTextParser(object):

    def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True):
        '''
        If `eager_parse` is True will parse the subtitles right way, converting to our
        internal storage format, else only if you call `to_internal` directly (or `to`).
        Any errors during parsing will be of SubtitleParserError.
        Note that a file with no valid subs will be an error.
        '''
        self.input_string = input_string
        self.pattern = pattern
        self.language = language
        self._pattern = re.compile(pattern, *flags)
        if eager_parse:
            self.to_internal()

    def __iter__(self):
        return self._result_iter()

    def __len__(self):
        return len(self._pattern.findall(self.input_string))

    def __nonzero__(self):
        return bool(self._pattern.search(self.input_string))

    def _result_iter(self):
        """
        Should iterate over items like this:
        {
            'start': ...,
            'end': ...,
            'text': ...
        }
        start_time and end_time in seconds. If it is not defined use -1.
        """
        for item in self._matches:
            yield self._get_data(item.groupdict())

    def _get_data(self, match):
        return match

    def _get_matches(self):
        return self._pattern.finditer(self.input_string)

    def __unicode__(self):
        return self.to(self.file_type)

    @classmethod
    def parse(cls, input_string, language=None):
        return cls(input_string, language)

    def to(self, type):
        from babelsubs import to
        if isinstance(type, list):
            type = type[0]

        return to(self.to_internal(), type, language=self.language)

    def to_internal(self):
        if not hasattr(self, 'sub_set'):
            match = None
            try:
                self.sub_set = SubtitleSet(self.language)
                for match in self._matches:
                    item = self._get_data(match.groupdict())
                    # fix me: support markup
                    text = self.get_markup(item['text'])
                    self.sub_set.append_subtitle(item['start'], item['end'], text, escape=False)
                if match is None:
                    raise ValueError("No subs found")
            except Exception as e:
                raise SubtitleParserError(original_error=e)

        return self.sub_set

    def get_markup(self, text):
        return text

    _matches = property(_get_matches)
Beispiel #24
0
class BaseTextParser(object):
    # xml based formats must let encoding handling to the xml parser
    # as the encoding will be declared on the root el. All other
    # parsers should allow unicode
    NO_UNICODE = False

    def __init__(self, input_string, pattern, language=None, flags=[], eager_parse=True):
        '''
        If `eager_parse` is True will parse the subtitles right way, converting to our
        internal storage format, else only if you call `to_internal` directly (or `to`).
        Any errors during parsing will be of SubtitleParserError.
        Note that a file with no valid subs will be an error.
        '''
        self.input_string = input_string
        self.pattern = pattern
        self.language = language
        self._pattern = re.compile(pattern, *flags)
        if eager_parse:
            self.to_internal()

    def __iter__(self):
        return self._result_iter()

    def __len__(self):
        return len(self._pattern.findall(self.input_string))

    def __nonzero__(self):
        return bool(self._pattern.search(self.input_string))

    def _result_iter(self):
        """
        Should iterate over items like this:
        {
            'start': ...,
            'end': ...,
            'text': ...
        }
        start_time and end_time in seconds. If it is not defined use -1.
        """
        for item in self._matches:
            yield self._get_data(item.groupdict())

    def _get_data(self, match):
        return match

    def _get_matches(self):
        if not isinstance(self.input_string, unicode) and not self.NO_UNICODE:
            self.input_string = self.input_string.decode('utf-8')
        return self._pattern.finditer(self.input_string)

    def __unicode__(self):
        return self.to(self.file_type)

    @classmethod
    def parse(cls, input_string, language=None):
        return cls(input_string, language)

    def to(self, type):
        from babelsubs import to
        if isinstance(type, list):
            type = type[0]

        return to(self.to_internal(), type, language=self.language)

    def to_internal(self):
        if not hasattr(self, 'sub_set'):
            match = None
            try:
                self.sub_set = SubtitleSet(self.language)
                for match in self._matches:
                    item = self._get_data(match.groupdict())
                    text = self.get_markup(item['text'])
                    self.sub_set.append_subtitle(
                        item['start'], item['end'], text,
                        region=item.get('region'), escape=False)
                if match is None:
                    raise ValueError("No subs found")
            except Exception as e:
                raise SubtitleParserError(original_error=e)

        return self.sub_set

    def get_markup(self, text):
        return text.replace("\n", '<br/>')

    _matches = property(_get_matches)
Beispiel #25
0
 def test_space_before_end_span(self):
     source = """<span fontStyle="italic">one<br/>two </span>three<span fontStyle="italic">four.</span>"""
     subs = SubtitleSet('en')
     subs.append_subtitle(0, 1000, source, escape=False)
     items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS)
     self.assertEqual(items[0].text, '<i>one\ntwo </i>three<i>four.</i>')
Beispiel #26
0
 def test_regions(self):
     subs = SubtitleSet('en')
     sub = subs.append_subtitle(0, 1000, "test", region="top")
     generator = WEBVTTGenerator(subs)
     self.assertEqual(generator.format_cue_header(subs.subtitle_items()[0]),
                      u'00:00:00.000 --> 00:00:01.000 line:1')
Beispiel #27
0
 def test_space_before_end_span(self):
     source = """<span fontStyle="italic">one<br/>two </span>three<span fontStyle="italic">four.</span>"""
     subs = SubtitleSet('en')
     subs.append_subtitle(0, 1000, source, escape=False)
     items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS)
     self.assertEqual(items[0].text, '<i>one\ntwo </i>three<i>four.</i>')
Beispiel #28
0
def make_subtitle_set(language_code, num_subs=4):
    sset = SubtitleSet(language_code)
    for x in xrange(0, num_subs):
        sset.append_subtitle(x * 1000, x * 1000 - 1, "Sub %s" % x)
    return sset
Beispiel #29
0
 def test_span_around_newline(self):
     source = 'one<span fontStyle="italic"><br/></span>two'
     subs = SubtitleSet('en')
     subs.append_subtitle(0, 1000, source, escape=False)
     items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS)
     self.assertEqual(items[0].text, 'one<i>\n</i>two')
Beispiel #30
0
 def test_regions(self):
     subs = SubtitleSet('en')
     sub = subs.append_subtitle(0, 1000, "test", region="top")
     generator = WEBVTTGenerator(subs)
     self.assertEqual(generator.format_cue_header(subs.subtitle_items()[0]),
                      u'00:00:00.000 --> 00:00:01.000 line:1')
Beispiel #31
0
 def test_span_around_newline(self):
     source = 'one<span fontStyle="italic"><br/></span>two'
     subs = SubtitleSet('en')
     subs.append_subtitle(0, 1000, source, escape=False)
     items = subs.subtitle_items(mappings=WEBVTTGenerator.MAPPINGS)
     self.assertEqual(items[0].text, 'one<i>\n</i>two')
Beispiel #32
0
class DFXPMergeTest(TestCase):
    def setUp(self):
        self.en_subs = SubtitleSet('en')
        self.es_subs = SubtitleSet('es')
        self.fr_subs = SubtitleSet('fr')
        self.en_subs.append_subtitle(1000, 1500, 'content')
        self.es_subs.append_subtitle(1000, 1500, 'spanish content')
        self.es_subs.append_subtitle(2000, 2500, 'spanish content 2',
                                     new_paragraph=True)
        self.fr_subs.append_subtitle(1000, 1500, 'french content')

    def test_dfxp_merge(self):
        result = DFXPGenerator.merge_subtitles(
            [self.en_subs, self.es_subs, self.fr_subs])

        utils.assert_long_text_equal(result, """\
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling" xml:lang="">
    <head>
        <metadata xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
            <ttm:title/>
            <ttm:description/>
            <ttm:copyright/>
        </metadata>
        <styling xmlns:tts="http://www.w3.org/ns/ttml#styling">
            <style xml:id="amara-style" tts:color="white" tts:fontFamily="proportionalSansSerif" tts:fontSize="18px" tts:textAlign="center"/>
        </styling>
        <layout xmlns:tts="http://www.w3.org/ns/ttml#styling">
            <region xml:id="amara-subtitle-area" style="amara-style" tts:extent="560px 62px" tts:padding="5px 3px" tts:backgroundColor="black" tts:displayAlign="after"/>
        </layout>
    </head>
    <body region="amara-subtitle-area">
        <div xml:lang="en">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">content</p>
            </div>
        </div>
        <div xml:lang="es">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">spanish content</p>
            </div>
            <div>
                <p begin="00:00:02.000" end="00:00:02.500">spanish content 2</p>
            </div>
        </div>
        <div xml:lang="fr">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">french content</p>
            </div>
        </div>
    </body>
</tt>
""")

    def test_merge_with_header(self):
        initial_ttml = etree.fromstring("""\
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling">
    <head>
        <styling>
            <style xml:id="style" tts:color="foo" tts:fontSize="bar" />
        </styling>

        <layout>
            <region xml:id="region" style="style" tts:extent="foo" tts:origin="bar" />
        </layout>
    </head>
    <body />
</tt>""")

        result = DFXPGenerator.merge_subtitles(
            [self.en_subs, self.es_subs, self.fr_subs],
            initial_ttml=initial_ttml)

        utils.assert_long_text_equal(result, """\
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:tts="http://www.w3.org/ns/ttml#styling" xml:lang="">
    <head>
        <styling>
            <style xml:id="style" tts:color="foo" tts:fontSize="bar"/>
        </styling>
        <layout>
            <region xml:id="region" style="style" tts:extent="foo" tts:origin="bar"/>
        </layout>
    </head>
    <body>
        <div xml:lang="en">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">content</p>
            </div>
        </div>
        <div xml:lang="es">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">spanish content</p>
            </div>
            <div>
                <p begin="00:00:02.000" end="00:00:02.500">spanish content 2</p>
            </div>
        </div>
        <div xml:lang="fr">
            <div>
                <p begin="00:00:01.000" end="00:00:01.500">french content</p>
            </div>
        </div>
    </body>
</tt>
""")
Beispiel #33
0
 def _subs_to_sset(self, subs):
     sset = SubtitleSet(self.language.language_code)
     for s in subs:
         sset.append_subtitle(*s)
     return sset