def test_sami_to_dfxp_xml_output(self): captions = SAMIReader().read(SAMPLE_SAMI_SYNTAX_ERROR) results = DFXPWriter(relativize=False, fit_to_screen=False).write(captions) self.assertTrue(isinstance(results, unicode)) self.assertTrue(u'xmlns="http://www.w3.org/ns/ttml"' in results) self.assertTrue( u'xmlns:tts="http://www.w3.org/ns/ttml#styling"' in results)
def test_sami_to_dfxp_with_margin_for_language(self): caption_set = SAMIReader().read(SAMPLE_SAMI_LANG_MARGIN) results = DFXPWriter( relativize=False, fit_to_screen=False).write(caption_set) self.assertDFXPEquals( SAMPLE_DFXP_FROM_SAMI_WITH_LANG_MARGINS, results )
def test_sami_to_dfxp_ignores_multiple_span_aligns(self): caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_MULTIPLE_SPAN_ALIGNS) results = DFXPWriter( relativize=False, fit_to_screen=False).write(caption_set) self.assertDFXPEquals( SAMPLE_DFXP_FROM_SAMI_WITH_BAD_SPAN_ALIGN, results )
def test_lang_option(self, sample_webvtt_multi_lang_en, sample_webvtt_multi_lang_de, sample_sami_with_multi_lang): caption_set = SAMIReader().read(sample_sami_with_multi_lang) results = WebVTTWriter().write(caption_set, 'de-DE') assert sample_webvtt_multi_lang_de == results results = WebVTTWriter().write(caption_set, 'en-US') assert sample_webvtt_multi_lang_en == results
def test_sami_to_dfxp_conversion(self): caption_set = SAMIReader().read(SAMPLE_SAMI) results = DFXPWriter(relativize=False, fit_to_screen=False).write(caption_set) self.assertTrue(isinstance(results, unicode)) self.assertDFXPEquals( DFXP_FROM_SAMI_WITH_POSITIONING, results )
def test_sami_with_css_inline_style_to_webvtt_conversion( self, sample_webvtt_from_sami_with_style, sample_sami_with_css_inline_style): caption_set = SAMIReader().read(sample_sami_with_css_inline_style) results = WebVTTWriter(video_width=640, video_height=360).write(caption_set) assert isinstance(results, str) self.assert_webvtt_equals(sample_webvtt_from_sami_with_style, results)
def test_sami_to_dfxp_ignores_multiple_span_aligns( self, sample_dfxp_from_sami_with_bad_span_align, sample_sami_with_multiple_span_aligns): caption_set = SAMIReader().read(sample_sami_with_multiple_span_aligns) results = DFXPWriter(relativize=False, fit_to_screen=False).write(caption_set) self.assert_dfxp_equals(sample_dfxp_from_sami_with_bad_span_align, results)
def test_sami_with_p_and_span_align(self, sample_sami_with_p_and_span_align): """<span> align DOES NOT override <p> align if it is specified inline. """ caption_set = SAMIReader().read(sample_sami_with_p_and_span_align) caption = caption_set.get_captions('en-US')[0] assert caption.layout_info.alignment.horizontal == \ HorizontalAlignmentEnum.RIGHT
def test_sami_to_dfxp_with_margin_for_language( self, sample_dfxp_from_sami_with_lang_margins, sample_sami_lang_margin): caption_set = SAMIReader().read(sample_sami_lang_margin) results = DFXPWriter(relativize=False, fit_to_screen=False).write(caption_set) self.assert_dfxp_equals(sample_dfxp_from_sami_with_lang_margins, results)
def test_is_relativized(self, sample_sami_partial_margins_relativized, sample_sami_partial_margins): # Absolute positioning settings (e.g. px) are converted to percentages caption_set = SAMIReader().read(sample_sami_partial_margins) result = SAMIWriter(video_width=VIDEO_WIDTH, video_height=VIDEO_HEIGHT).write(caption_set) self.assert_sami_equals(result, sample_sami_partial_margins_relativized)
def test_sami_to_dfxp_conversion(self, sample_dfxp_from_sami_with_positioning, sample_sami): caption_set = SAMIReader().read(sample_sami) results = DFXPWriter(relativize=False, fit_to_screen=False).write(caption_set) assert isinstance(results, str) self.assert_dfxp_equals(sample_dfxp_from_sami_with_positioning, results)
def sami2srt(sami_file_path: str, srt_file_path: Optional[str] = None) -> None: """Convert SAMI subtitles to SubRip subtitles. Arguments: sami_file_path {string} -- The path to the SAMI file. srt_file_path {string} -- The path to the SubRip file. """ file: Union[TextIO, BinaryIO] converter = CaptionConverter() encoding = Utils.detect_encoding(sami_file_path) with open(sami_file_path, "r", encoding=encoding) as file: converter.read(file.read(), SAMIReader()) if srt_file_path is None: srt_file_path = sami_file_path.replace(".smi", ".srt") with open(srt_file_path, "wb") as file: file.write(converter.write(SRTWriter()).encode(encoding)) Utils.remove_trailing_newlines(srt_file_path, encoding)
def get_raw_lines(): subdir = os.path.join(HERE, 'subs') for sfn in os.listdir(subdir): if not re.match(r'^\d+$', sfn): continue reader = SAMIReader() with open(os.path.join(subdir, sfn)) as sf: tree = reader.read(sf.read()) lang, = tree.get_languages() for caption in tree.get_captions(lang): sub = ' '.join( (n.content.strip() for n in _nodes_that_are_ayoade(caption))) yield sub yield ''
def test_empty_file(self): self.assertRaises(CaptionReadNoCaptions, SAMIReader().read, SAMPLE_SAMI_EMPTY)
def test_6digit_color_code_from_3digit_input(self): captions = SAMIReader().read(SAMPLE_SAMI.replace("#ffeedd", "#fed")) p_style = captions.get_style("p") self.assertEquals("#ffeedd", p_style[u'color'])
def test_proper_timestamps(self): captions = SAMIReader().read(SAMPLE_SAMI) paragraph = captions.get_captions("en-US")[2] self.assertEquals(17000000, paragraph.start) self.assertEquals(18752000, paragraph.end)
def test_caption_length(self): captions = SAMIReader().read(SAMPLE_SAMI) self.assertEquals(7, len(captions.get_captions("en-US")))
def test_sami_with_p_align(self): caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_P_ALIGN) caption = caption_set.get_captions('en-US')[0] self.assertEquals(caption.layout_info.alignment.horizontal, HorizontalAlignmentEnum.RIGHT)
def test_sami_to_sami_conversion(self): caption_set = SAMIReader().read(SAMPLE_SAMI_NO_LANG) results = SAMIWriter().write(caption_set) self.assertTrue(isinstance(results, unicode)) self.assertSAMIEquals(SAMPLE_SAMI_WITH_LANG, results) self.assertTrue(u"lang: en-US;" in results)
def test_invalid_markup_is_properly_handled(self): captions = SAMIReader().read(SAMPLE_SAMI_SYNTAX_ERROR) self.assertEquals(2, len(captions.get_captions("en-US")))
def test_partial_margins(self): caption_set = SAMIReader().read(SAMPLE_SAMI_PARTIAL_MARGINS) # Ensure that undefined margins are converted to explicitly nil padding # (i.e. "0%") self.assertEquals(caption_set.layout_info.padding.to_xml_attribute(), u'0% 29pt 0% 29pt')
def test_is_relativized(self): # Absolute positioning settings (e.g. px) are converted to percentages caption_set = SAMIReader().read(SAMPLE_SAMI_PARTIAL_MARGINS) result = SAMIWriter(video_width=VIDEO_WIDTH, video_height=VIDEO_HEIGHT).write(caption_set) self.assertEqual(result, SAMPLE_SAMI_PARTIAL_MARGINS_RELATIVIZED)
def test_sami_with_css_id_style_to_webvtt_conversion(self): caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_CSS_ID_STYLE) results = WebVTTWriter(video_width=640, video_height=360).write(caption_set) self.assertTrue(isinstance(results, unicode)) self.assertWebVTTEquals(SAMPLE_WEBVTT_FROM_SAMI_WITH_ID_STYLE, results)
def test_sami_to_dfxp_with_span(self): caption_set = SAMIReader().read(SAMPLE_SAMI_WITH_SPAN) results = DFXPWriter(relativize=False, fit_to_screen=False).write(caption_set) self.assertDFXPEquals(SAMPLE_DFXP_FROM_SAMI_WITH_SPAN, results)
def test_sami_to_sami_conversion(self): caption_set = SAMIReader().read(SAMPLE_SAMI) results = SAMIWriter(relativize=False, fit_to_screen=False).write(caption_set) self.assertTrue(isinstance(results, unicode)) self.assertSAMIEquals(SAMPLE_SAMI, results)
def build_sami_reader(): return SubtitleReader(SAMIReader())
def test_sami_to_srt_conversion(self): caption_set = SAMIReader().read(SAMPLE_SAMI) results = SRTWriter().write(caption_set) self.assertTrue(isinstance(results, unicode)) self.assertSRTEquals(SAMPLE_SRT, results)
def test_sami_reader_only_supports_unicode_input(self): with self.assertRaises(InvalidInputError): SAMIReader().read('')
def test_double_br(self): captions = SAMIReader().read(SAMPLE_SAMI_DOUBLE_BR.decode(u'utf-8')) self.assertEqual(SAMPLE_WEBVTT_DOUBLE_BR.decode(u'utf-8'), self.writer.write(captions))
def test_detection(self): self.assertTrue(SAMIReader().detect(SAMPLE_SAMI))