Beispiel #1
0
 def test_properly_converts_timing(self):
     caption_set = DFXPReader().read(DFXP_WITH_ALTERNATIVE_TIMING_FORMATS)
     caps = caption_set.get_captions('en-US')
     self.assertEqual(caps[0].start, 1900000)
     self.assertEqual(caps[0].end, 3050000)
     self.assertEqual(caps[1].start, 4000000)
     self.assertEqual(caps[1].end, 5200000)
Beispiel #2
0
    def test_properly_converts_frametiming(self,
                                           sample_dfxp_with_frame_timing):
        caption_set = DFXPReader().read(sample_dfxp_with_frame_timing)
        caps = caption_set.get_captions('en-US')

        assert caps[0].end == 12233333
        assert caps[0].start == 9666666
Beispiel #3
0
 def setUpClass(cls):
     cls.captions = DFXPReader().read(SAMPLE_DFXP.decode(u'utf-8'))
     cls.captions_utf8 = DFXPReader().read(
         SAMPLE_DFXP_UTF8.decode(u'utf-8'))
     cls.captions_unicode = DFXPReader().read(SAMPLE_DFXP_UNICODE)
     cls.captions_without_style_and_region = DFXPReader().read(
         SAMPLE_DFXP_WITHOUT_REGION_AND_STYLE.decode(u'utf-8'))
Beispiel #4
0
    def test_merge_concurrent_captions(self, dfxp_with_concurrent_captions):
        initial_caption_set = DFXPReader().read(dfxp_with_concurrent_captions)
        initial_captions = initial_caption_set.get_captions('en-US')
        caption_set = merge_concurrent_captions(initial_caption_set)
        captions = caption_set.get_captions('en-US')

        assert len(initial_captions) == 5
        assert len(captions) == 3
    def test_merge_concurrent_captions(self):
        caption_set = DFXPReader().read(DFXP_WITH_CONCURRENT_CAPTIONS)
        captions = caption_set.get_captions('en-US')
        self.assertEqual(len(captions), 5)

        caption_set = merge_concurrent_captions(caption_set)
        captions = caption_set.get_captions('en-US')
        self.assertEqual(len(captions), 3)
Beispiel #6
0
    def test_proper_xml_entity_escaping(
            self, sample_dfxp_with_escaped_apostrophe):
        caption_set = DFXPReader().read(sample_dfxp_with_escaped_apostrophe)
        cue_text = caption_set.get_captions('en-US')[0].nodes[0].content

        assert cue_text == "<< \"Andy's Caf\xe9 & Restaurant\" this way"
        result = DFXPWriter().write(caption_set)
        assert "&lt;&lt; \"Andy's Café &amp; Restaurant\" this way" in result
Beispiel #7
0
 def test_properly_converts_timing(self):
     caption_set = DFXPReader().read(
         DFXP_WITH_ALTERNATIVE_TIMING_FORMATS)
     caps = caption_set.get_captions('en-US')
     self.assertEqual(caps[0].start, 1900000)
     self.assertEqual(caps[0].end, 3050000)
     self.assertEqual(caps[1].start, 4000000)
     self.assertEqual(caps[1].end, 5200000)
Beispiel #8
0
    def test_merge_concurrent_captions(self):
        caption_set = DFXPReader().read(DFXP_WITH_CONCURRENT_CAPTIONS)
        captions = caption_set.get_captions('en-US')
        self.assertEqual(len(captions), 5)

        caption_set = merge_concurrent_captions(caption_set)
        captions = caption_set.get_captions('en-US')
        self.assertEqual(len(captions), 3)
Beispiel #9
0
 def test_individual_timings_of_captions_with_matching_timespec_are_kept(
         self):  # noqa
     captionset = DFXPReader().read(
         SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING)
     expected_timings = [(9209000, 12312000)] * 3
     actual_timings = [(c_.start, c_.end)
                       for c_ in captionset.get_captions('en-US')]
     self.assertEqual(expected_timings, actual_timings)
 def test_proper_xml_entity_escaping(self):
     caption_set = DFXPReader().read(DFXP_WITH_ESCAPED_APOSTROPHE)
     cue_text = caption_set.get_captions(u'en-US')[0].nodes[0].content
     self.assertEqual(cue_text,
                      u"<< \"Andy's Caf\xe9 & Restaurant\" this way")
     result = DFXPWriter().write(caption_set)
     self.assertIn(u"&lt;&lt; \"Andy's Café &amp; Restaurant\" this way",
                   result)
Beispiel #11
0
 def test_individual_timings_of_captions_with_matching_timespec_are_kept(self):  # noqa
     captionset = DFXPReader().read(
         SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
     )
     expected_timings = [(9209000, 12312000)] * 3
     actual_timings = [(c_.start, c_.end) for c_ in
                       captionset.get_captions('en-US')]
     self.assertEqual(expected_timings, actual_timings)
Beispiel #12
0
 def test_properly_converts_timing_with_frames_with_30fps_default(self):
     caption_set = DFXPReader().read(SAMPLE_DFXP_FRAME_TIMING_DEFAULT_30FPS)
     caps = caption_set.get_captions('en-US')
     self.assertEqual(caps[0].start, 8100000)
     self.assertEqual(caps[0].end, 12190000)
     self.assertEqual(caps[1].start, 258066666)
     self.assertEqual(caps[1].end, 8168190000)
     self.assertEqual(caps[2].start, 43208966666)
     self.assertEqual(caps[2].end, 50412190000)
Beispiel #13
0
 def test_properly_converts_timing_with_frames_with_25fps(self):
     caption_set = DFXPReader().read(SAMPLE_DFXP_FRAME_TIMING_25FPS)
     caps = caption_set.get_captions('en-US')
     self.assertEqual(caps[0].start, 8120000)
     self.assertEqual(caps[0].end, 12190000)
     self.assertEqual(caps[1].start, 258080000)
     self.assertEqual(caps[1].end, 8168190000)
     self.assertEqual(caps[2].start, 43208960000)
     self.assertEqual(caps[2].end, 50412190000)
Beispiel #14
0
    def test_individual_timings_of_captions_with_matching_timespec_are_kept(
            self, sample_dfxp_multiple_captions_with_the_same_timing):
        captionset = DFXPReader().read(
            sample_dfxp_multiple_captions_with_the_same_timing)
        expected_timings = [(9209000, 12312000)] * 3
        actual_timings = [(c_.start, c_.end)
                          for c_ in captionset.get_captions('en-US')]

        assert expected_timings == actual_timings
Beispiel #15
0
    def test_properly_converts_timing(
            self, sample_dfxp_with_alternative_timing_formats):
        caption_set = DFXPReader().read(
            sample_dfxp_with_alternative_timing_formats)
        caps = caption_set.get_captions('en-US')

        assert caps[0].start == 1900000
        assert caps[0].end == 3050000
        assert caps[1].start == 4000000
        assert caps[1].end == 5200000
 def test_proper_xml_entity_escaping(self):
     caption_set = DFXPReader().read(DFXP_WITH_ESCAPED_APOSTROPHE)
     cue_text = caption_set.get_captions(u'en-US')[0].nodes[0].content
     self.assertEqual(
         cue_text, u"<< \"Andy's Caf\xe9 & Restaurant\" this way")
     result = DFXPWriter().write(caption_set)
     self.assertIn(
         u"&lt;&lt; \"Andy's Café &amp; Restaurant\" this way",
         result
     )
Beispiel #17
0
 def test_properly_converts_timing_with_frames_with_29_97fps_multiplier(
         self):
     caption_set = DFXPReader().read(
         SAMPLE_DFXP_FRAME_TIMING_MULTIPLIER_29_97FPS)
     caps = caption_set.get_captions('en-US')
     self.assertEqual(caps[0].start, 8100100)
     self.assertEqual(caps[0].end, 12190000)
     self.assertEqual(caps[1].start, 258066733)
     self.assertEqual(caps[1].end, 8168190000)
     self.assertEqual(caps[2].start, 43208967633)
     self.assertEqual(caps[2].end, 50412190000)
Beispiel #18
0
    def test_individual_layouts_of_captions_with_matching_timespec_are_kept(self):  # noqa
        captionset = DFXPReader().read(
            SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
        )
        expected_layouts = [
            (((10, u'%'), (10, u'%')), None, None, (u'center', u'bottom')),
            (((40, u'%'), (40, u'%')), None, None, (u'center', u'bottom')),
            (((10, u'%'), (70, u'%')), None, None, (u'center', u'bottom'))]
        actual_layouts = [c_.layout_info.serialized() for c_ in
                          captionset.get_captions('en-US')]

        self.assertEqual(expected_layouts, actual_layouts)
Beispiel #19
0
    def test_individual_texts_of_captions_with_matching_timespec_are_kept(self):  # noqa
        captionset = DFXPReader().read(
            SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
        )

        expected_texts = [u'Some text here',
                          u'Some text there',
                          u'Caption texts are everywhere!']
        actual_texts = [c_.nodes[0].content for c_ in
                        captionset.get_captions("en-US")]

        self.assertEqual(expected_texts, actual_texts)
Beispiel #20
0
    def test_individual_layouts_of_captions_with_matching_timespec_are_kept(self):  # noqa
        captionset = DFXPReader().read(
            SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
        )
        expected_layouts = [
            (((10, UnitEnum.PERCENT), (10, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)),
            (((40, UnitEnum.PERCENT), (40, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)),
            (((10, UnitEnum.PERCENT), (70, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM))]
        actual_layouts = [c_.layout_info.serialized() for c_ in
                          captionset.get_captions('en-US')]

        self.assertEqual(expected_layouts, actual_layouts)
Beispiel #21
0
    def test_individual_texts_of_captions_with_matching_timespec_are_kept(self):  # noqa
        captionset = DFXPReader().read(
            SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
        )

        expected_texts = ['Some text here',
                          'Some text there',
                          'Caption texts are everywhere!']
        actual_texts = [c_.nodes[0].content for c_ in
                        captionset.get_captions("en-US")]

        self.assertEqual(expected_texts, actual_texts)
Beispiel #22
0
    def test_offset_time(self):
        reader = DFXPReader()

        assert 1 == reader._translate_time("0.001ms")
        assert 2000 == reader._translate_time("2ms")
        assert 1000000 == reader._translate_time("1s")
        assert 1234567 == reader._translate_time("1.234567s")
        assert 180000000 == reader._translate_time("3m")
        assert 14400000000 == reader._translate_time("4h")
        # Tick values are not supported
        with pytest.raises(InvalidInputError):
            reader._translate_time("2.3t")
Beispiel #23
0
    def test_individual_layouts_of_captions_with_matching_timespec_are_kept(self):  # noqa
        captionset = DFXPReader().read(
            SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
        )
        expected_layouts = [
            (((10, UnitEnum.PERCENT), (10, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)),
            (((40, UnitEnum.PERCENT), (40, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)),
            (((10, UnitEnum.PERCENT), (70, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM))]
        actual_layouts = [c_.layout_info.serialized() for c_ in
                          captionset.get_captions('en-US')]

        self.assertEqual(expected_layouts, actual_layouts)
Beispiel #24
0
    def test_individual_layouts_of_captions_with_matching_timespec_are_kept(self):  # noqa
        captionset = DFXPReader().read(
            SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING
        )
        expected_layouts = [
            (((10, u'%'), (10, u'%')), None, None, (u'center', u'bottom')),
            (((40, u'%'), (40, u'%')), None, None, (u'center', u'bottom')),
            (((10, u'%'), (70, u'%')), None, None, (u'center', u'bottom'))]
        actual_layouts = [c_.layout_info.serialized() for c_ in
                          captionset.get_captions('en-US')]

        self.assertEqual(expected_layouts, actual_layouts)
Beispiel #25
0
    def test_individual_texts_of_captions_with_matching_timespec_are_kept(
            self, sample_dfxp_multiple_captions_with_the_same_timing):
        captionset = DFXPReader().read(
            sample_dfxp_multiple_captions_with_the_same_timing)

        expected_texts = [
            'Some text here', 'Some text there',
            'Caption texts are everywhere!'
        ]
        actual_texts = [
            c_.nodes[0].content for c_ in captionset.get_captions("en-US")
        ]

        assert expected_texts == actual_texts
 def test_dfxp_to_webvtt_preserves_proper_alignment(self):
     # This failed at one point when the CaptionSet had node breaks with
     # different positioning. It was fixed both at the DFXPReader AND the
     # WebVTTWriter.
     caption_set = DFXPReader().read(DFXP_STYLE_REGION_ALIGN_CONFLICT)
     results = WebVTTWriter().write(caption_set)
     self.assertEquals(WEBVTT_FROM_DFXP_WITH_CONFLICTING_ALIGN, results)
Beispiel #27
0
    def test_break_node_positioning_is_ignored(
            self, webvtt_from_dfxp_with_conflicting_align,
            dfxp_style_region_align_conflict):
        caption_set = DFXPReader().read(dfxp_style_region_align_conflict)
        results = WebVTTWriter().write(caption_set)

        assert webvtt_from_dfxp_with_conflicting_align == results
 def test_dfxp_with_positioning_to_webvtt_conversion(self):
     caption_set = DFXPReader().read(SAMPLE_DFXP_WITH_POSITIONING)
     results = WebVTTWriter(video_width=VIDEO_WIDTH,
                            video_height=VIDEO_HEIGHT).write(caption_set)
     self.assertTrue(isinstance(results, str))
     self.assertWebVTTEquals(
         SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING_AND_STYLE, results)
    def test_default_region_p_tags(self):
        caption_set = DFXPReader().read(SAMPLE_DFXP)
        result = DFXPWriter().write(caption_set)

        soup = BeautifulSoup(result, u'xml')
        for p in soup.find_all(u'p'):
            self.assertEquals(p.attrs.get(u'region'), DFXP_DEFAULT_REGION_ID)
    def test_default_styling_p_tags(self):
        caption_set = DFXPReader().read(SAMPLE_DFXP)
        result = DFXPWriter().write(caption_set)

        soup = BeautifulSoup(result, u'xml')
        for p in soup.find_all(u'p'):
            self.assertEquals(p.attrs.get(u'style'), 'p')
    def test_legacy_convert(self):
        caption_set = DFXPReader(read_invalid_positioning=True).read(
            SAMPLE_DFXP_FOR_LEGACY_WRITER_INPUT)

        result = LegacyDFXPWriter().write(caption_set)

        self.assertEqual(result, SAMPLE_DFXP_FOR_LEGACY_WRITER_OUTPUT)
 def load_subtitles(self, video_id, langs=('ru',)):
     for lang in langs:
         if subs_exists(video_id, lang):
             continue
         print('Loading {} subtitles for {}'.format(lang, video_id))
         opts = {
             'writeautomaticsub': True,
             'subtitleslangs': langs,
             'subtitlesformat': 'ttml',
             'nooverwrites': True,
             'skip_download': True,
             'outtmpl': join(get_dir(video_id), video_id + '.ttml')
         }
         with youtube_dl.YoutubeDL(opts) as ytdl:
             ytdl.download(['https://www.youtube.com/watch?v={}'.format(video_id)])
         # WevVTT captions from youtube contains duplicate phrases with overlapping time segments
         # It is not comfortable, that's why subtitles firstly downloaded in ttml format
         # Then subtitles converted to webvtt
         subs_path_ttml = join(get_dir(video_id), video_id + '.' + lang + '.ttml')
         subs_path_vtt = join(get_dir(video_id), video_id + '.' + lang + '.vtt')
         if exists(subs_path_ttml):
             print('converting subtitles')
             with open(subs_path_ttml, encoding='utf-8') as f:
                 subs = DFXPReader().read(f.read())
             with open(subs_path_vtt, 'w', encoding='utf-8') as f:
                 f.write(WebVTTWriter().write(subs))
 def test_is_relativized(self):
     # Absolute positioning settings (e.g. px) are converted to percentages
     caption_set = DFXPReader().read(
         SAMPLE_DFXP_WITH_POSITIONING.decode('utf-8'))
     result = DFXPWriter(video_width=VIDEO_WIDTH,
                         video_height=VIDEO_HEIGHT).write(caption_set)
     self.assertEqual(result, SAMPLE_DFXP_WITH_RELATIVIZED_POSITIONING)
Beispiel #34
0
    def test_dfxp_to_webvtt_conversion(self, sample_webvtt_from_dfxp,
                                       sample_dfxp):
        caption_set = DFXPReader().read(sample_dfxp)
        results = WebVTTWriter().write(caption_set)

        assert isinstance(results, str)
        self.assert_webvtt_equals(sample_webvtt_from_dfxp, results)
Beispiel #35
0
    def test_dfxp_to_webvtt_adds_explicit_size(
            self, sample_webvtt_output_long_cue, sample_dfxp_long_cue):
        caption_set = DFXPReader().read(sample_dfxp_long_cue)
        results = WebVTTWriter().write(caption_set)

        assert isinstance(results, str)
        assert sample_webvtt_output_long_cue == results
 def test_fit_to_screen(self):
     # Check if caption width and height are is explicitly set and
     # recalculate it if necessary. This prevents long captions from being
     # cut out of the screen.
     caption_set = DFXPReader().read(SAMPLE_DFXP_LONG_CUE)
     result = DFXPWriter().write(caption_set)
     self.assertEqual(result, SAMPLE_DFXP_LONG_CUE_FIT_TO_SCREEN)
Beispiel #37
0
 def test_caption_error_for_invalid_positioning_values(
         self, sample_dfxp_invalid_positioning_value_template):
     invalid_value_dfxp = (
         sample_dfxp_invalid_positioning_value_template.format(
             origin="px 5px"))
     with pytest.raises(CaptionReadSyntaxError):
         DFXPReader().read(invalid_value_dfxp)
Beispiel #38
0
 def test_offset_time(self):
     reader = DFXPReader()
     self.assertEqual(1, reader._translate_time("0.001ms"))
     self.assertEqual(2000, reader._translate_time("2ms"))
     self.assertEqual(1000000, reader._translate_time("1s"))
     self.assertEqual(1234567, reader._translate_time("1.234567s"))
     self.assertEqual(180000000, reader._translate_time("3m"))
     self.assertEqual(14400000000, reader._translate_time("4h"))
     # Tick values are not supported
     self.assertRaises(
         InvalidInputError, reader._translate_time, "2.3t")
Beispiel #39
0
 def test_caption_length(self):
     captions = DFXPReader().read(SAMPLE_DFXP)
     self.assertEquals(7, len(captions.get_captions(u"en-US")))
Beispiel #40
0
 def test_empty_cue(self):
     caption_set = DFXPReader().read(
         SAMPLE_DFXP_EMPTY_CUE)
     caps = caption_set.get_captions('en-US')
     self.assertEquals(caps[1], [])
Beispiel #41
0
 def test_caption_length(self):
     captions = DFXPReader().read(SAMPLE_DFXP.decode(u'utf-8'))
     self.assertEquals(8, len(captions.get_captions(u"en-US")))
Beispiel #42
0
    def test_proper_pcc_format(self):
        captions = DFXPReader().read(SAMPLE_DFXP)

        self.assertEquals(set(["captions", "styles"]), set(captions.keys()))
        self.assertEquals(7, len(captions["captions"]["en-US"]))
Beispiel #43
0
    def test_proper_timestamps(self):
        captions = DFXPReader().read(SAMPLE_DFXP.decode(u'utf-8'))
        paragraph = captions.get_captions(u"en-US")[2]

        self.assertEquals(17000000, paragraph.start)
        self.assertEquals(18752000, paragraph.end)
Beispiel #44
0
 def test_invalid_markup_is_properly_handled(self):
     captions = DFXPReader().read(SAMPLE_DFXP_SYNTAX_ERROR.decode(u'utf-8'))
     self.assertEquals(2, len(captions.get_captions(u"en-US")))