def test_properly_converts_timing(self): caption_set = DFXPReader().read(DFXP_WITH_ALTERNATIVE_TIMING_FORMATS) caps = caption_set.get_captions('en-US') self.assertEqual(caps[0].start, 1900000) self.assertEqual(caps[0].end, 3050000) self.assertEqual(caps[1].start, 4000000) self.assertEqual(caps[1].end, 5200000)
def test_proper_xml_entity_escaping(self): caption_set = DFXPReader().read(DFXP_WITH_ESCAPED_APOSTROPHE) cue_text = caption_set.get_captions('en-US')[0].nodes[0].content self.assertEqual(cue_text, "<< \"Andy's Caf\xe9 & Restaurant\" this way") result = DFXPWriter().write(caption_set) self.assertIn("<< \"Andy's Café & Restaurant\" this way", result)
def test_individual_timings_of_captions_with_matching_timespec_are_kept( self): # noqa captionset = DFXPReader().read( SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING) expected_timings = [(9209000, 12312000)] * 3 actual_timings = [(c_.start, c_.end) for c_ in captionset.get_captions('en-US')] self.assertEqual(expected_timings, actual_timings)
def test_merge_concurrent_captions(self): caption_set = DFXPReader().read(DFXP_WITH_CONCURRENT_CAPTIONS) captions = caption_set.get_captions('en-US') self.assertEqual(len(captions), 5) caption_set = merge_concurrent_captions(caption_set) captions = caption_set.get_captions('en-US') self.assertEqual(len(captions), 3)
def test_individual_texts_of_captions_with_matching_timespec_are_kept( self): # noqa captionset = DFXPReader().read( SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING) expected_texts = [ 'Some text here', 'Some text there', 'Caption texts are everywhere!' ] actual_texts = [ c_.nodes[0].content for c_ in captionset.get_captions("en-US") ] self.assertEqual(expected_texts, actual_texts)
def test_default_region_p_tags(self): caption_set = DFXPReader().read(SAMPLE_DFXP) result = DFXPWriter().write(caption_set) soup = BeautifulSoup(result, 'lxml') for p in soup.find_all('p'): self.assertEqual(p.attrs.get('region'), DFXP_DEFAULT_REGION_ID)
def test_fit_to_screen(self): # Check if caption width and height are is explicitly set and # recalculate it if necessary. This prevents long captions from being # cut out of the screen. caption_set = DFXPReader().read(SAMPLE_DFXP_LONG_CUE) result = DFXPWriter().write(caption_set) self.assertEqual(result, SAMPLE_DFXP_LONG_CUE_FIT_TO_SCREEN)
def test_dfxp_with_positioning_to_webvtt_conversion(self): caption_set = DFXPReader().read(SAMPLE_DFXP_WITH_POSITIONING) results = WebVTTWriter(video_width=VIDEO_WIDTH, video_height=VIDEO_HEIGHT).write(caption_set) self.assertTrue(isinstance(results, text_type)) self.assertWebVTTEquals( SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING_AND_STYLE, results)
def test_dfxp_to_webvtt_preserves_proper_alignment(self): # This failed at one point when the CaptionSet had node breaks with # different positioning. It was fixed both at the DFXPReader AND the # WebVTTWriter. caption_set = DFXPReader().read(DFXP_STYLE_REGION_ALIGN_CONFLICT) results = WebVTTWriter().write(caption_set) self.assertEqual(WEBVTT_FROM_DFXP_WITH_CONFLICTING_ALIGN, results)
def test_legacy_convert(self): caption_set = DFXPReader(read_invalid_positioning=True).read( SAMPLE_DFXP_FOR_LEGACY_WRITER_INPUT) result = LegacyDFXPWriter().write(caption_set) self.assertEqual(result, SAMPLE_DFXP_FOR_LEGACY_WRITER_OUTPUT)
def test_default_styling_p_tags(self): caption_set = DFXPReader().read(SAMPLE_DFXP) result = DFXPWriter().write(caption_set) soup = BeautifulSoup(result, 'lxml') for p in soup.find_all('p'): self.assertEqual(p.attrs.get('style'), 'p')
def test_offset_time(self): reader = DFXPReader() self.assertEqual(1, reader._translate_time("0.001ms")) self.assertEqual(2000, reader._translate_time("2ms")) self.assertEqual(1000000, reader._translate_time("1s")) self.assertEqual(1234567, reader._translate_time("1.234567s")) self.assertEqual(180000000, reader._translate_time("3m")) self.assertEqual(14400000000, reader._translate_time("4h")) # Tick values are not supported self.assertRaises(InvalidInputError, reader._translate_time, "2.3t")
def test_individual_layouts_of_captions_with_matching_timespec_are_kept( self): # noqa captionset = DFXPReader().read( SAMPLE_DFXP_MULTIPLE_CAPTIONS_WITH_THE_SAME_TIMING) expected_layouts = [ (((10, UnitEnum.PERCENT), (10, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)), (((40, UnitEnum.PERCENT), (40, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)), (((10, UnitEnum.PERCENT), (70, UnitEnum.PERCENT)), None, None, (HorizontalAlignmentEnum.CENTER, VerticalAlignmentEnum.BOTTOM)) ] actual_layouts = [ c_.layout_info.serialized() for c_ in captionset.get_captions('en-US') ] self.assertEqual(expected_layouts, actual_layouts)
def test_dfxp_to_sami_with_margins(self): caption_set = DFXPReader().read(SAMPLE_DFXP_FROM_SAMI_WITH_MARGINS) results = SAMIWriter(video_width=VIDEO_WIDTH, video_height=VIDEO_HEIGHT).write(caption_set) margins = [ "margin-right: 6.04%;", "margin-bottom: 0%;", "margin-top: 0%;", "margin-left: 6.04%;" ] for margin in margins: self.assertIn(margin, results)
def test_dont_create_style_tags_with_no_id(self): # The <style> tags can have no 'xml:id' attribute. Previously, in this # case, the style was copied to the output file, with the 'xml:id' # property declared, but no value assigned to it. Since such a style # can not be referred anyway, and <style> elements, children of # <region> tags shouldn't be referred to anyway, we don't include # these styles in the output file caption_set = DFXPReader().read( SAMPLE_DFXP_STYLE_TAG_WITH_NO_XML_ID_INPUT) result = DFXPWriter().write(caption_set) self.assertEqual(result, SAMPLE_DFXP_STYLE_TAG_WITH_NO_XML_ID_OUTPUT)
def test_default_styling_tag(self): caption_set = DFXPReader().read(SAMPLE_DFXP_WITHOUT_REGION_AND_STYLE) result = DFXPWriter().write(caption_set) default_style = _recreate_style(DFXP_DEFAULT_STYLE, None) default_style['xml:id'] = DFXP_DEFAULT_STYLE_ID soup = BeautifulSoup(result, 'lxml-xml') style = soup.find('style', {'xml:id': DFXP_DEFAULT_STYLE_ID}) self.assertTrue(style) self.assertEqual(style.attrs, default_style)
def test_default_region_tag(self): caption_set = DFXPReader().read(SAMPLE_DFXP) result = DFXPWriter().write(caption_set) soup = BeautifulSoup(result, 'lxml-xml') region = soup.find('region', {'xml:id': DFXP_DEFAULT_REGION_ID}) default_region = _convert_layout_to_attributes(DFXP_DEFAULT_REGION) default_region['xml:id'] = DFXP_DEFAULT_REGION_ID self.assertTrue(region) self.assertEqual(region.attrs['xml:id'], DFXP_DEFAULT_REGION_ID) self.assertEqual(region.attrs, default_region)
def test_incorrectly_specified_positioning_is_explicitly_accepted(self): # The arguments used here illustrate how we will try to read # and write incorrectly specified positioning information. # By incorrect, I mean the specs say that those attributes should be # ignored, not that the attributes themselves are outside of the specs caption_set = DFXPReader(read_invalid_positioning=True).read( SAMPLE_DFXP_INVALID_BUT_SUPPORTED_POSITIONING_INPUT) result = DFXPWriter(relativize=False, fit_to_screen=False, write_inline_positioning=True).write(caption_set) if six.PY2: self.assertDFXPEquals( result, SAMPLE_DFXP_INVALID_BUT_SUPPORTED_POSITIONING_OUTPUT) else: # attributes are sorted differently I guess testing for same # length is close enough self.assertDFXPEquals( result, SAMPLE_DFXP_INVALID_BUT_SUPPORTED_POSITIONING_OUTPUT)
def test_dfxp_to_sami_conversion(self): caption_set = DFXPReader().read(SAMPLE_DFXP) results = SAMIWriter().write(caption_set) self.assertTrue(isinstance(results, text_type)) self.assertSAMIEquals(SAMPLE_SAMI, results)
def test_caption_error_for_invalid_positioning_values(self): invalid_value_dfxp = ( SAMPLE_DFXP_INVALID_POSITIONING_VALUE_TEMPLATE.format( origin="px 5px")) self.assertRaises(CaptionReadSyntaxError, DFXPReader().read, invalid_value_dfxp)
def test_caption_error_for_invalid_or_unsupported_positioning_units(self): invalid_dfxp = (SAMPLE_DFXP_INVALID_POSITIONING_VALUE_TEMPLATE.format( origin="6foo 7bar")) self.assertRaises(CaptionReadSyntaxError, DFXPReader().read, invalid_dfxp)
def test_dfxp_to_dfxp_conversion(self): caption_set = DFXPReader().read(SAMPLE_DFXP) results = DFXPWriter().write(caption_set) self.assertTrue(isinstance(results, text_type)) self.assertDFXPEquals(SAMPLE_DFXP_OUTPUT, results)
def test_dfxp_reader_only_supports_unicode_input(self): with self.assertRaises(InvalidInputError): DFXPReader().read(b'')
def test_dfxp_to_webvtt_adds_explicit_size(self): caption_set = DFXPReader().read(SAMPLE_DFXP_LONG_CUE) results = WebVTTWriter().write(caption_set) self.assertTrue(isinstance(results, text_type)) self.assertEqual(SAMPLE_WEBVTT_OUTPUT_LONG_CUE, results)
def test_is_relativized(self): # Absolute positioning settings (e.g. px) are converted to percentages caption_set = DFXPReader().read(SAMPLE_DFXP_WITH_POSITIONING) result = DFXPWriter(video_width=VIDEO_WIDTH, video_height=VIDEO_HEIGHT).write(caption_set) self.assertEqual(result, SAMPLE_DFXP_WITH_RELATIVIZED_POSITIONING)
def test_dfxp_with_inherited_style_to_webvtt_conversion(self): caption_set = DFXPReader().read(SAMPLE_DFXP_WITH_INHERITED_STYLE) results = WebVTTWriter().write(caption_set) self.assertTrue(isinstance(results, text_type)) self.assertWebVTTEquals(SAMPLE_WEBVTT_FROM_DFXP_WITH_STYLE, results)
def test_dfxp_to_webvtt_conversion(self): caption_set = DFXPReader().read(SAMPLE_DFXP) results = WebVTTWriter().write(caption_set) self.assertTrue(isinstance(results, text_type)) self.assertWebVTTEquals(SAMPLE_WEBVTT_FROM_DFXP, results)
def test_invalid_markup_is_properly_handled(self): captions = DFXPReader().read(SAMPLE_DFXP_SYNTAX_ERROR) self.assertEqual(2, len(captions.get_captions("en-US")))
def test_correct_region_attributes_are_recreated(self): caption_set = DFXPReader().read(SAMPLE_DFXP_MULTIPLE_REGIONS_INPUT) result = DFXPWriter(relativize=False, fit_to_screen=False).write(caption_set) self.assertDFXPEquals(result, SAMPLE_DFXP_MULTIPLE_REGIONS_OUTPUT)
def test_empty_file(self): self.assertRaises(CaptionReadNoCaptions, DFXPReader().read, SAMPLE_DFXP_EMPTY)