Example #1
0
    def test_default_positioning_when_no_positioning_is_specified(self):
        caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions('en-US')
        ]

        expected_caption_layouts = [
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)), None, None,
             (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP)),
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)), None, None,
             (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]

        actual_node_layout_infos = [
            {idx: [node.layout_info.serialized() for node in caption.nodes]}
            for idx, caption in enumerate(caption_set.get_captions('en-US'))
        ]

        expected_node_layout_infos = [
            {0: [(((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
                  None,
                  None,
                  (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]},
            {1: [(((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
                  None,
                  None,
                  (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]}
        ]

        self.assertEqual(expected_node_layout_infos, actual_node_layout_infos)
        self.assertEqual(expected_caption_layouts, actual_caption_layouts)
Example #2
0
    def test_default_positioning_when_no_positioning_is_specified(self):
        caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions(u'en-US')
        ]

        expected_caption_layouts = [(((0.0, u'%'), (86.66666666666667, u'%')),
                                     None, None, (u'left', u'top')),
                                    (((0.0, u'%'), (86.66666666666667, u'%')),
                                     None, None, (u'left', u'top'))]

        actual_node_layout_infos = [{
            idx: [node.layout_info.serialized() for node in caption.nodes]
        } for idx, caption in enumerate(caption_set.get_captions('en-US'))]

        expected_node_layout_infos = [{
            0: [(((0.0, u'%'), (86.66666666666667, u'%')), None, None,
                 (u'left', u'top'))]
        }, {
            1: [(((0.0, u'%'), (86.66666666666667, u'%')), None, None,
                 (u'left', u'top'))]
        }]

        self.assertEqual(expected_node_layout_infos, actual_node_layout_infos)
        self.assertEqual(expected_caption_layouts, actual_caption_layouts)
Example #3
0
    def test_default_positioning_when_no_positioning_is_specified(self):
        caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions(u'en-US')
        ]

        expected_caption_layouts = [
            (((0.0, u'%'), (86.66666666666667, u'%')), None, None,
             (u'left', u'top')),
            (((0.0, u'%'), (86.66666666666667, u'%')), None, None,
             (u'left', u'top'))]

        actual_node_layout_infos = [
            {idx: [node.layout_info.serialized() for node in caption.nodes]}
            for idx, caption in enumerate(caption_set.get_captions('en-US'))
        ]

        expected_node_layout_infos = [
            {0: [(((0.0, u'%'), (86.66666666666667, u'%')),
                  None,
                  None,
                  (u'left', u'top'))]},
            {1: [(((0.0, u'%'), (86.66666666666667, u'%')),
                  None,
                  None,
                  (u'left', u'top'))]}
        ]

        self.assertEqual(expected_node_layout_infos, actual_node_layout_infos)
        self.assertEqual(expected_caption_layouts, actual_caption_layouts)
Example #4
0
    def test_default_positioning_when_no_positioning_is_specified(self):
        caption_set = SCCReader().read(SAMPLE_NO_POSITIONING_AT_ALL_SCC)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions('en-US')
        ]

        expected_caption_layouts = [
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None, (HorizontalAlignmentEnum.LEFT,
                          VerticalAlignmentEnum.TOP)),
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None, (HorizontalAlignmentEnum.LEFT,
                          VerticalAlignmentEnum.TOP))
        ]

        actual_node_layout_infos = [{
            idx: [node.layout_info.serialized() for node in caption.nodes]
        } for idx, caption in enumerate(caption_set.get_captions('en-US'))]

        expected_node_layout_infos = [{
            0: [(((0.0, UnitEnum.PERCENT), (86.66666666666667,
                                            UnitEnum.PERCENT)), None, None,
                 (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]
        }, {
            1: [(((0.0, UnitEnum.PERCENT), (86.66666666666667,
                                            UnitEnum.PERCENT)), None, None,
                 (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]
        }]

        self.assertEqual(expected_node_layout_infos, actual_node_layout_infos)
        self.assertEqual(expected_caption_layouts, actual_caption_layouts)
Example #5
0
    def test_last_caption_zero_end_time_is_corrected(self):
        caption_set = SCCReader().read(
            SAMPLE_SCC_NO_EXPLICIT_END_TO_LAST_CAPTION)  # noqa

        last_caption = caption_set.get_captions('en-US')[-1]
        self.assertEqual(last_caption.end,
                         last_caption.start + 4 * 1000 * 1000)
Example #6
0
    def test_italics_are_properly_read(self):
        def switches_italics(node):
            """Determine if the current node switches italics on or off, or
            raise ValueError is it's not a style node

            Style nodes should be deprecated in favor of another model, so this
            function is expected to go away.

            :type node: pycaption.CaptionNode
            :rtype: bool
            """
            if not node.type_ == node.STYLE:
                raise ValueError(u"This should be a style node.")

            return node.start

        caption_set = SCCReader().read(SAMPLE_SCC_WITH_ITALICS)
        nodes = caption_set.get_captions(u'en-US')[0].nodes

        # We assert that the text is specified in italics.
        # If Style nodes are replaced, the way these 3 assertions are made
        # will most likely change
        self.assertEqual(switches_italics(nodes[0]), True)
        self.assertEqual(switches_italics(nodes[2]), False)
        self.assertEqual(nodes[1].content, u'abababab')
Example #7
0
    def test_scc_positioning_is_read(self):
        captions = SCCReader().read(
            six.text_type(SAMPLE_SCC_MULTIPLE_POSITIONING))

        # SCC generates only origin, and we always expect it.
        expected_positioning = [
            ((0.0, UnitEnum.PERCENT), (80.0, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (0.0, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (20.0, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (46.666666666666664, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (93.33333333333333, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (53.333333333333336, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (13.333333333333334, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (33.333333333333336, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (6.666666666666667, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (40.0, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (73.33333333333333, UnitEnum.PERCENT))
        ]
        actual_positioning = [
            caption_.layout_info.origin.serialized() for caption_ in
            captions.get_captions('en-US')
        ]

        self.assertEqual(expected_positioning, actual_positioning)
Example #8
0
    def test_last_caption_zero_end_time_is_corrected(self):
        caption_set = SCCReader().read(SAMPLE_SCC_NO_EXPLICIT_END_TO_LAST_CAPTION)  # noqa

        last_caption = caption_set.get_captions('en-US')[-1]
        self.assertEqual(
            last_caption.end, last_caption.start + 4 * 1000 * 1000
        )
Example #9
0
    def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2):
        scc1 = SCCReader().read(sample_scc_roll_up_ru2)
        captions = scc1.get_captions('en-US')
        expected_timings = [
            (766666.6666666667, 2800000.0),
            (2800000.0, 4600000.0),
            (4600000.0, 6166666.666666667),
            (6166666.666666667, 9733333.333333332),
            (9733333.333333332, 11266666.666666668),
            (11266666.666666668, 12266666.666666668),
            (12266666.666666668, 13266666.666666668),
            (13266666.666666668, 14266666.666666668),
            (14266666.666666668, 17066666.666666668),
            (17066666.666666668, 18666666.666666668),
            (18666666.666666668, 20233333.333333336),
            (20233333.333333336, 21833333.333333332),
            (21833333.333333332, 34933333.33333333),
            (34933333.33333333, 36433333.33333333),
            (36433333.33333333, 44300000.0),
            (44300000.0, 44866666.666666664),
        ]

        actual_timings = [(c_.start, c_.end) for c_ in captions]

        assert expected_timings == actual_timings
Example #10
0
    def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2):
        # There were no tests for ROLL-UP captions, but the library processed
        # Roll-Up captions. Make sure nothing changes during the refactoring
        scc1 = SCCReader().read(sample_scc_roll_up_ru2)
        captions = scc1.get_captions('en-US')
        actual_texts = [cap_.nodes[0].content for cap_ in captions]
        expected_texts = [
            '>>> HI.',
            "I'M KEVIN CUNNING AND AT",
            "INVESTOR'S BANK WE BELIEVE IN",
            'HELPING THE LOCAL NEIGHBORHOODS',
            'AND IMPROVING THE LIVES OF ALL',
            'WE SERVE.',
            '®°½½',
            'ABû',
            'ÁÁÉÓ¡',
            "WHERE YOU'RE STANDING NOW,",
            "LOOKING OUT THERE, THAT'S AL",
            'THE CROWD.',
            '>> IT WAS GOOD TO BE IN TH',
            "And restore Iowa's land, water",
            'And wildlife.',
            '>> Bike Iowa, your source for',
        ]

        assert expected_texts == actual_texts
Example #11
0
 def test_freeze_rollup_captions_contents(self):
     # There were no tests for ROLL-UP captions, but the library processed
     # Roll-Up captions. Make sure nothing changes during the refactoring
     scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
     captions = scc1.get_captions(u'en-US')
     actual_texts = [cap_.nodes[0].content for cap_ in captions]
     expected_texts = [u'>>> HI',
                       u"I'M KEVIN CUNNING AND AT",
                       # Notice the missing 'N' at the end. This is because
                       # the input is not OK (should only use 4 byte "words"
                       # (filling in with '80' where only 2 bytes are
                       # meaningful)
                       u"INVESTOR'S BANK WE BELIEVE I",
                       u'HELPING THE LOCAL NEIGHBORHOOD',
                       u'AND IMPROVING THE LIVES OF ALL',
                       u'WE SERVE',
                       # special chars. Last one should be printer 2 times
                       # XXX this is a bug.
                       u'®°½',
                       # special/ extended chars delete last 0-4 chars.
                       # XXX - this is a bug.
                       u'ABû',
                       u'ÁÉÓ¡',
                       u"WHERE YOU'RE STANDING NOW,",
                       u"LOOKING OUT THERE, THAT'S AL",
                       u'THE CROWD.',
                       u'>> IT WAS GOOD TO BE IN TH',
                       u"And restore Iowa's land, water",
                       u'And wildlife.',
                       u'>> Bike Iowa, your source for']
     self.assertEqual(expected_texts, actual_texts)
Example #12
0
    def test_italics_are_properly_read(self):
        def switches_italics(node):
            """Determine if the current node switches italics on or off, or
            raise ValueError is it's not a style node

            Style nodes should be deprecated in favor of another model, so this
            function is expected to go away.

            :type node: pycaption.CaptionNode
            :rtype: bool
            """
            if not node.type_ == node.STYLE:
                raise ValueError(u"This should be a style node.")

            return node.start

        caption_set = SCCReader().read(SAMPLE_SCC_WITH_ITALICS)
        nodes = caption_set.get_captions(u'en-US')[0].nodes

        # We assert that the text is specified in italics.
        # If Style nodes are replaced, the way these 3 assertions are made
        # will most likely change
        self.assertEqual(switches_italics(nodes[0]), True)
        self.assertEqual(switches_italics(nodes[2]), False)
        self.assertEqual(nodes[1].content, u'abababab')
Example #13
0
 def test_freeze_rollup_captions_contents(self):
     # There were no tests for ROLL-UP captions, but the library processed
     # Roll-Up captions. Make sure nothing changes during the refactoring
     scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
     captions = scc1.get_captions(u'en-US')
     actual_texts = [cap_.nodes[0].content for cap_ in captions]
     expected_texts = [
         u'>>> HI',
         u"I'M KEVIN CUNNING AND AT",
         # Notice the missing 'N' at the end. This is because
         # the input is not OK (should only use 4 byte "words"
         # (filling in with '80' where only 2 bytes are
         # meaningful)
         u"INVESTOR'S BANK WE BELIEVE I",
         u'HELPING THE LOCAL NEIGHBORHOOD',
         u'AND IMPROVING THE LIVES OF ALL',
         u'WE SERVE',
         # special chars. Last one should be printer 2 times
         # XXX this is a bug.
         u'®°½',
         # special/ extended chars delete last 0-4 chars.
         # XXX - this is a bug.
         u'ABû',
         u'ÁÉÓ¡',
         u"WHERE YOU'RE STANDING NOW,",
         u"LOOKING OUT THERE, THAT'S AL",
         u'THE CROWD.',
         u'>> IT WAS GOOD TO BE IN TH',
         u"And restore Iowa's land, water",
         u'And wildlife.',
         u'>> Bike Iowa, your source for'
     ]
     self.assertEqual(expected_texts, actual_texts)
Example #14
0
    def test_eoc_first_command(self, sample_scc_eoc_first_command):
        # TODO First caption should be ignored because it doesn't start with
        #  a pop/roll/paint on command
        caption_set = SCCReader().read(sample_scc_eoc_first_command)

        # just one caption, first EOC disappears
        num_captions = len(caption_set.get_captions('en-US'))

        assert num_captions == 2
Example #15
0
    def test_last_caption_zero_end_time_is_corrected(
            self, sample_scc_no_explicit_end_to_last_caption):
        caption_set = SCCReader().read(
            sample_scc_no_explicit_end_to_last_caption
        )

        last_caption = caption_set.get_captions('en-US')[-1]

        assert last_caption.end == last_caption.start + 4 * 1000 * 1000
Example #16
0
    def test_proper_timestamps(self, sample_scc_pop_on):
        captions = SCCReader().read(sample_scc_pop_on)
        paragraph = captions.get_captions("en-US")[2]

        delta_start = abs(paragraph.start - 17000000)
        delta_end = abs(paragraph.end - 18752000)

        assert delta_start < TOLERANCE_MICROSECONDS
        assert delta_end < TOLERANCE_MICROSECONDS
Example #17
0
    def test_proper_timestamps(self):
        captions = SCCReader().read(SAMPLE_SCC.decode(u'utf-8'))
        paragraph = captions.get_captions(u"en-US")[2]

        delta_start = abs(paragraph.start - 17000000)
        delta_end = abs(paragraph.end - 18752000)

        self.assertTrue(delta_start < TOLERANCE_MICROSECONDS)
        self.assertTrue(delta_end < TOLERANCE_MICROSECONDS)
Example #18
0
    def test_proper_timestamps(self):
        captions = SCCReader().read(SAMPLE_SCC_POP_ON)
        paragraph = captions.get_captions(u"en-US")[2]

        delta_start = abs(paragraph.start - 17000000)
        delta_end = abs(paragraph.end - 18752000)

        self.assertTrue(delta_start < TOLERANCE_MICROSECONDS)
        self.assertTrue(delta_end < TOLERANCE_MICROSECONDS)
def make_srt(url, uid):
    r = requests.get(url)
    contents = SCCReader().read(r.text)
    txt = ""
    for i in contents.get_captions('en-US'):
        txt += str(i) + "\n"
    subsFileHandler = open(str(uid) + ".srt", "w", encoding='utf-8')
    subsFileHandler.write(txt)
    subsFileHandler.close()
    os.rename(str(uid) + ".srt", "fox-data/" + str(uid) + ".srt")
Example #20
0
def make_srt(url):
    #url="https://static-media.fox.com/dcg/cc/2-guns/XMX1593_24029253693.scc"
    r = requests.get(url)
    contents = SCCReader().read(r.text)
    txt = ""
    for i in contents.get_captions('en-US'):
        txt += str(i) + "\n"
    subsFileHandler = open('sample' + ".srt", "w", encoding='utf-8')
    subsFileHandler.write(txt)
    subsFileHandler.close()
    print("Done")
Example #21
0
    def test_timing_is_properly_set_on_split_captions(self):
        caption_set = SCCReader().read(
            SAMPLE_SCC_PRODUCES_CAPTIONS_WITH_START_AND_END_TIME_THE_SAME)
        expected_timings = [(u'00:01:35.666', u'00:01:40.866'),
                            (u'00:01:35.666', u'00:01:40.866'),
                            (u'00:01:35.666', u'00:01:40.866')]

        actual_timings = [(c_.format_start(), c_.format_end())
                          for c_ in caption_set.get_captions('en-US')]

        self.assertEqual(expected_timings, actual_timings)
Example #22
0
    def test_correct_last_bad_timing(self):
        # This fix was implemented with a hack. The commands for the Pop-on
        # captions will have to be reviewed, but until then this is good enough
        caption_set = SCCReader().read(SAMPLE_SCC_PRODUCES_BAD_LAST_END_TIME)

        expected_timings = [(1408266666.6666667, 1469700000.0),
                            (3208266666.666667, 3269700000.0)]

        actual_timings = [(c_.start, c_.end)
                          for c_ in caption_set.get_captions(u'en-US')]
        self.assertEqual(expected_timings, actual_timings)
Example #23
0
    def test_timing_is_properly_set_on_split_captions(self):
        caption_set = SCCReader().read(
            SAMPLE_SCC_PRODUCES_CAPTIONS_WITH_START_AND_END_TIME_THE_SAME
        )
        expected_timings = [(u'00:01:35.666', u'00:01:40.866'),
                            (u'00:01:35.666', u'00:01:40.866'),
                            (u'00:01:35.666', u'00:01:40.866')]

        actual_timings = [(c_.format_start(), c_.format_end()) for c_ in
                          caption_set.get_captions('en-US')]

        self.assertEqual(expected_timings, actual_timings)
Example #24
0
    def test_correct_last_bad_timing(self):
        # This fix was implemented with a hack. The commands for the Pop-on
        # captions will have to be reviewed, but until then this is good enough
        caption_set = SCCReader().read(SAMPLE_SCC_PRODUCES_BAD_LAST_END_TIME)

        expected_timings = [(1408266666.6666667, 1469700000.0),
                            (3208266666.666667, 3269700000.0)]

        actual_timings = [
            (c_.start, c_.end) for c_ in caption_set.get_captions(u'en-US')
        ]
        self.assertEqual(expected_timings, actual_timings)
Example #25
0
    def test_default_positioning_when_no_positioning_is_specified(
            self, sample_no_positioning_at_all_scc):
        caption_set = SCCReader().read(sample_no_positioning_at_all_scc)

        actual_caption_layouts = [
            caption.layout_info.serialized()
            for caption in caption_set.get_captions('en-US')
        ]

        expected_caption_layouts = [
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None,
             (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP)),
            (((0.0, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
             None, None,
             (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]

        actual_node_layout_infos = [
            {idx: [node.layout_info.serialized() for node in caption.nodes]}
            for idx, caption in enumerate(caption_set.get_captions('en-US'))
        ]

        expected_node_layout_infos = [
            {0: [(((0.0, UnitEnum.PERCENT),
                   (86.66666666666667, UnitEnum.PERCENT)),
                  None,
                  None,
                  (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]},
            {1: [(((0.0, UnitEnum.PERCENT),
                   (86.66666666666667, UnitEnum.PERCENT)),
                  None,
                  None,
                  (HorizontalAlignmentEnum.LEFT, VerticalAlignmentEnum.TOP))]}
        ]

        assert expected_node_layout_infos == actual_node_layout_infos
        assert expected_caption_layouts == actual_caption_layouts
Example #26
0
    def test_freeze_colon_spec_time(self):
        # Coverage doesn't mean we test that functionality, so assert that
        # all the timing specs that previously had coverage, will actually
        # remain unchanged.
        scc1 = SCCReader().read(SAMPLE_SCC_POP_ON)
        expected_timings = [(9776433.333333332, 12312300.0),
                            (14781433.33333333, 16883533.333333332),
                            (16950266.666666664, 18618600.000000004),
                            (18685333.333333332, 20754066.666666664),
                            (20820800.0, 26626600.0),
                            (26693333.333333332, 32098733.333333332),
                            (32165466.66666666, 36202833.33333332)]

        actual_timings = [
            (c_.start, c_.end) for c_ in scc1.get_captions(u'en-US')]
        self.assertEqual(expected_timings, actual_timings)
Example #27
0
    def test_correct_last_bad_timing(self,
                                     sample_scc_produces_bad_last_end_time):
        # This fix was implemented with a hack. The commands for the Pop-on
        # captions will have to be reviewed, but until then this is good enough
        caption_set = SCCReader().read(sample_scc_produces_bad_last_end_time)

        expected_timings = [
            (1408266666.6666667, 1469700000.0),
            (3208266666.666667, 3269700000.0),
        ]

        actual_timings = [
            (c_.start, c_.end) for c_ in caption_set.get_captions('en-US')
        ]

        assert expected_timings == actual_timings
Example #28
0
    def test_freeze_colon_spec_time(self):
        # Coverage doesn't mean we test that functionality, so assert that
        # all the timing specs that previously had coverage, will actually
        # remain unchanged.
        scc1 = SCCReader().read(SAMPLE_SCC_POP_ON)
        expected_timings = [(9776433.333333332, 12312300.0),
                            (14781433.33333333, 16883533.333333332),
                            (16950266.666666664, 18618600.000000004),
                            (18685333.333333332, 20754066.666666664),
                            (20820800.0, 26626600.0),
                            (26693333.333333332, 32098733.333333332),
                            (32165466.66666666, 36202833.33333332)]

        actual_timings = [(c_.start, c_.end)
                          for c_ in scc1.get_captions(u'en-US')]
        self.assertEqual(expected_timings, actual_timings)
Example #29
0
    def test_timing_is_properly_set_on_split_captions(
            self, sample_scc_produces_captions_with_start_and_end_time_the_same
    ):
        caption_set = SCCReader().read(
            sample_scc_produces_captions_with_start_and_end_time_the_same
        )
        expected_timings = [
            ('00:01:35.666', '00:01:40.866'),
            ('00:01:35.666', '00:01:40.866'),
            ('00:01:35.666', '00:01:40.866'),
        ]

        actual_timings = [
            (c_.format_start(), c_.format_end())
            for c_ in caption_set.get_captions('en-US')
        ]

        assert expected_timings == actual_timings
Example #30
0
    def test_ignore_repeated_tab_offset(self, sample_scc_repeated_tab_offset):
        expected_lines = [
            '[Radio reporter]',
            'The I-10 Santa Monica Freeway',
            'westbound is jammed,',
            'due to a three-car accident',
            'blocking lanes 1 and 2',
        ]

        caption_set = SCCReader().read(sample_scc_repeated_tab_offset)
        actual_lines = [
            node.content
            for cap_ in caption_set.get_captions('en-US')
            for node in cap_.nodes
            if node.type_ == CaptionNode.TEXT
        ]

        assert expected_lines == actual_lines
Example #31
0
    def test_freeze_semicolon_spec_time(self):
        scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
        captions = scc1.get_captions(u'en-US')
        expected_timings = [(766666.6666666667, 2800000.0),
                            (2800000.0, 4600000.0),
                            (4600000.0, 6166666.666666667),
                            (6166666.666666667, 9733333.333333332),
                            (9733333.333333332, 11266666.666666668),
                            (11266666.666666668, 12266666.666666668),
                            (12266666.666666668, 13266666.666666668),
                            (13266666.666666668, 14266666.666666668),
                            (14266666.666666668, 17066666.666666668),
                            (17066666.666666668, 18666666.666666668),
                            (18666666.666666668, 20233333.333333336),
                            (20233333.333333336, 21833333.333333332),
                            (21833333.333333332, 34933333.33333333),
                            (34933333.33333333, 36433333.33333333),
                            (36433333.33333333, 44300000.0),
                            (44300000.0, 44866666.666666664)]

        actual_timings = [(c_.start, c_.end) for c_ in captions]
        self.assertEqual(expected_timings, actual_timings)
Example #32
0
    def test_freeze_semicolon_spec_time(self):
        scc1 = SCCReader().read(SAMPLE_SCC_ROLL_UP_RU2)
        captions = scc1.get_captions(u'en-US')
        expected_timings = [(766666.6666666667, 2800000.0),
                            (2800000.0, 4600000.0),
                            (4600000.0, 6166666.666666667),
                            (6166666.666666667, 9733333.333333332),
                            (9733333.333333332, 11266666.666666668),
                            (11266666.666666668, 12266666.666666668),
                            (12266666.666666668, 13266666.666666668),
                            (13266666.666666668, 14266666.666666668),
                            (14266666.666666668, 17066666.666666668),
                            (17066666.666666668, 18666666.666666668),
                            (18666666.666666668, 20233333.333333336),
                            (20233333.333333336, 21833333.333333332),
                            (21833333.333333332, 34933333.33333333),
                            (34933333.33333333, 36433333.33333333),
                            (36433333.33333333, 44300000.0),
                            (44300000.0, 44866666.666666664)]

        actual_timings = [(c_.start, c_.end) for c_ in captions]
        self.assertEqual(expected_timings, actual_timings)
Example #33
0
    def test_scc_positioning_is_read(self, sample_scc_multiple_positioning):
        captions = SCCReader().read(sample_scc_multiple_positioning)

        # SCC generates only origin, and we always expect it.
        expected_positioning = [
            ((0.0, UnitEnum.PERCENT), (80.0, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (0.0, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (20.0, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (46.666666666666664, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (93.33333333333333, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (53.333333333333336, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (13.333333333333334, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (33.333333333333336, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (86.66666666666667, UnitEnum.PERCENT)),
            ((75.0, UnitEnum.PERCENT), (6.666666666666667, UnitEnum.PERCENT)),
            ((37.5, UnitEnum.PERCENT), (40.0, UnitEnum.PERCENT)),
            ((12.5, UnitEnum.PERCENT), (73.33333333333333, UnitEnum.PERCENT)),
        ]
        actual_positioning = [
            caption_.layout_info.origin.serialized()
            for caption_ in captions.get_captions('en-US')
        ]

        assert expected_positioning == actual_positioning
Example #34
0
    def test_scc_positioning_is_read(self):
        captions = SCCReader().read(unicode(SAMPLE_SCC_MULTIPLE_POSITIONING))

        # SCC generates only origin, and we always expect it.
        expected_positioning = [
            ((0.0, u'%'), (80.0, u'%')),
            ((37.5, u'%'), (0.0, u'%')),
            ((75.0, u'%'), (20.0, u'%')),
            ((12.5, u'%'), (46.666666666666664, u'%')),
            ((12.5, u'%'), (93.33333333333333, u'%')),
            ((37.5, u'%'), (53.333333333333336, u'%')),
            ((75.0, u'%'), (13.333333333333334, u'%')),
            ((12.5, u'%'), (33.333333333333336, u'%')),
            ((12.5, u'%'), (86.66666666666667, u'%')),
            ((75.0, u'%'), (6.666666666666667, u'%')),
            ((37.5, u'%'), (40.0, u'%')),
            ((12.5, u'%'), (73.33333333333333, u'%'))
        ]
        actual_positioning = [
            caption_.layout_info.origin.serialized() for caption_ in
            captions.get_captions(u'en-US')
        ]

        self.assertEqual(expected_positioning, actual_positioning)
Example #35
0
    def test_caption_length(self):
        captions = SCCReader().read(SAMPLE_SCC_POP_ON)

        self.assertEquals(7, len(captions.get_captions(u"en-US")))
Example #36
0
    def test_caption_length(self):
        captions = SCCReader().read(SAMPLE_SCC.decode(u'utf-8'))

        self.assertEquals(7, len(captions.get_captions(u"en-US")))
Example #37
0
    def test_removed_extended_characters_ascii_duplicate(
            self, sample_scc_with_extended_characters):
        caption_set = SCCReader().read(sample_scc_with_extended_characters)
        nodes = caption_set.get_captions('en-US')[0].nodes

        assert nodes[0].content == 'MÄRTHA:'
Example #38
0
    def test_eoc_first_command(self):
        caption_set = SCCReader().read(SAMPLE_SCC_EOC_FIRST_COMMAND)

        # just one caption, first EOC disappears
        num_captions = len(caption_set.get_captions('en-US'))
        self.assertEqual(num_captions, 1)
Example #39
0
    def test_eoc_first_command(self):
        caption_set = SCCReader().read(SAMPLE_SCC_EOC_FIRST_COMMAND)

        # just one caption, first EOC disappears
        num_captions = len(caption_set.get_captions('en-US'))
        self.assertEqual(num_captions, 1)
Example #40
0
    def test_caption_length(self):
        captions = SCCReader().read(SAMPLE_SCC_POP_ON)

        self.assertEquals(7, len(captions.get_captions(u"en-US")))
Example #41
0
    def test_caption_length(self, sample_scc_pop_on):
        captions = SCCReader().read(sample_scc_pop_on)

        assert 7 == len(captions.get_captions("en-US"))
Example #42
0
    def test_caption_length(self):
        captions = SCCReader().read(SAMPLE_SCC.decode(u'utf-8'))

        self.assertEquals(7, len(captions.get_captions(u"en-US")))