def test_invalid_format(self): expected_language = languages.getlang_by_name('English') converter = build_subtitle_converter_from_file(os.path.join(test_files_dir, 'not.txt')) with self.assertRaises(InvalidSubtitleFormatError): converter.convert(expected_language.code)
def test_invalid_format__empty(self): expected_language = languages.getlang_by_name('English') converter = build_subtitle_converter_from_file(os.path.join(test_files_dir, 'empty.ttml')) with self.assertRaises(InvalidSubtitleFormatError, msg='Caption file is empty'): converter.convert(expected_language.code)
def test_invalid_language(self): expected_language = languages.getlang_by_name('Spanish') converter = build_subtitle_converter_from_file( os.path.join(test_files_dir, 'encapsulated.sami')) with self.assertRaises(InvalidSubtitleLanguageError): converter.convert(expected_language.code)
def test_not_expected_type(self): expected_format = file_formats.SCC expected_language = languages.getlang_by_name('Arabic') converter = build_subtitle_converter_from_file( os.path.join(test_files_dir, 'basic.srt'), in_format=expected_format) with self.assertRaises(InvalidSubtitleFormatError): converter.convert(expected_language.code)
def test_srt_conversion(self): expected_file = os.path.join(test_files_dir, 'basic.vtt') expected_language = languages.getlang_by_name('Arabic') converter = build_subtitle_converter_from_file(os.path.join(test_files_dir, 'basic.srt')) converter.replace_unknown_language(expected_language.code) with tempfile.NamedTemporaryFile() as actual_file: converter.write(actual_file.name, expected_language.code) self.assertFileHashesEqual(expected_file, actual_file.name)
def test_replace_unknown_language(self): expected_language = languages.getlang_by_name('Arabic') converter = build_subtitle_converter_from_file(os.path.join(test_files_dir, 'basic.srt')) self.assertTrue(converter.has_language(LANGUAGE_CODE_UNKNOWN)) converter.replace_unknown_language(expected_language.code) self.assertTrue(converter.has_language(expected_language.code)) self.assertFalse(converter.has_language(LANGUAGE_CODE_UNKNOWN))
def test_valid_language(self): expected_file = os.path.join(test_files_dir, 'encapsulated.vtt') expected_language = languages.getlang_by_name('English') converter = build_subtitle_converter_from_file( os.path.join(test_files_dir, 'encapsulated.sami')) self.assertTrue(converter.has_language(expected_language.code)) with tempfile.NamedTemporaryFile() as actual_file: converter.write(actual_file.name, expected_language.code) self.assertFileHashesEqual(expected_file, actual_file.name)
def download_and_transform_file(self, path): """ Download subtitles file at `path` and transform it to `.vtt` if necessary. Args: path (URL or local path) Returns: filename of final .vtt file """ key = "DOWNLOAD:{}".format(path) cache_file = get_cache_filename(key) if not config.UPDATE and not cache_is_outdated(path, cache_file): return cache_file config.LOGGER.info("\tDownloading {}".format(path)) with tempfile.NamedTemporaryFile() as temp_in_file,\ tempfile.NamedTemporaryFile() as temp_out_file: write_and_get_hash(path, temp_in_file) temp_in_file.seek(0) converter = build_subtitle_converter_from_file( temp_in_file.name, self.subtitlesformat) # We'll assume the provided file is in the passed language in this case if len(converter.get_language_codes()) == 1 \ and converter.has_language(LANGUAGE_CODE_UNKNOWN): converter.replace_unknown_language(self.language) convert_lang_code = self.language # Language is not present, let's try different codes if not converter.has_language(self.language): for lang_code in converter.get_language_codes(): language = languages.getlang_by_alpha2(lang_code) if language and language.code == self.language: convert_lang_code = lang_code break else: raise InvalidSubtitleLanguageError( "Missing language '{}' in subtitle file".format( self.language)) converter.write(temp_out_file.name, convert_lang_code) temp_out_file.seek(0) file_hash = get_hash(temp_out_file.name) filename = '{0}.{ext}'.format(file_hash, ext=file_formats.VTT) temp_out_file.seek(0) copy_file_to_storage(filename, temp_out_file) FILECACHE.set(key, bytes(filename, "utf-8")) return filename