def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi): language_code_convert_dict = { 'he': 'iw', 'zt': 'zh-cn', 'zh': 'zh-tw', } to_lang = alpha3_from_alpha2(to_lang) lang_obj = CustomLanguage.from_value(to_lang, "alpha3") if not lang_obj: lang_obj = Language(to_lang) if forced: lang_obj = Language.rebuild(lang_obj, forced=True) if hi: lang_obj = Language.rebuild(lang_obj, hi=True) logging.debug('BAZARR is translating in {0} this subtitles {1}'.format( lang_obj, source_srt_file)) max_characters = 5000 dest_srt_file = get_subtitle_path( video_path, language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(), extension='.srt', forced_tag=forced, hi_tag=hi) subs = pysubs2.load(source_srt_file, encoding='utf-8') lines_list = [x.plaintext for x in subs] joined_lines_str = '\n\n\n'.join(lines_list) logging.debug( 'BAZARR splitting subtitles into {} characters blocks'.format( max_characters)) lines_block_list = [] translated_lines_list = [] while len(joined_lines_str): partial_lines_str = joined_lines_str[:max_characters] if len(joined_lines_str) > max_characters: new_partial_lines_str = partial_lines_str.rsplit('\n\n\n', 1)[0] else: new_partial_lines_str = partial_lines_str lines_block_list.append(new_partial_lines_str) joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '') logging.debug('BAZARR is sending {} blocks to Google Translate'.format( len(lines_block_list))) for block_str in lines_block_list: empty_first_line = False if block_str.startswith('\n\n\n'): # This happens when the first line of text in a subtitles file is an empty string empty_first_line = True try: translated_partial_srt_text = GoogleTranslator( source='auto', target=language_code_convert_dict.get( lang_obj.alpha2, lang_obj.alpha2)).translate(text=block_str) except Exception: logging.exception( f'BAZARR Unable to translate subtitles {source_srt_file}') return False else: if empty_first_line: # GoogleTranslate remove new lines at the beginning of the string, so we add it back. translated_partial_srt_text = '\n\n\n' + translated_partial_srt_text translated_partial_srt_list = translated_partial_srt_text.split( '\n\n\n') translated_lines_list += translated_partial_srt_list logging.debug( 'BAZARR saving translated subtitles to {}'.format(dest_srt_file)) for i, line in enumerate(subs): try: line.plaintext = translated_lines_list[i] except IndexError: logging.error( f'BAZARR is unable to translate malformed subtitles: {source_srt_file}' ) return False subs.save(dest_srt_file) return dest_srt_file
def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi): language_code_convert_dict = { 'he': 'iw', 'zt': 'zh-cn', 'zh': 'zh-tw', } to_lang = alpha3_from_alpha2(to_lang) lang_obj = CustomLanguage.from_value(to_lang, "alpha3") if not lang_obj: lang_obj = Language(to_lang) if forced: lang_obj = Language.rebuild(lang_obj, forced=True) if hi: lang_obj = Language.rebuild(lang_obj, hi=True) logging.debug('BAZARR is translating in {0} this subtitles {1}'.format( lang_obj, source_srt_file)) max_characters = 5000 dest_srt_file = get_subtitle_path( video_path, language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(), extension='.srt', forced_tag=forced, hi_tag=hi) subs = pysubs2.load(source_srt_file, encoding='utf-8') lines_list = [x.plaintext for x in subs] joined_lines_str = '\n\n\n'.join(lines_list) logging.debug( 'BAZARR splitting subtitles into {} characters blocks'.format( max_characters)) lines_block_list = [] translated_lines_list = [] while len(joined_lines_str): partial_lines_str = joined_lines_str[:max_characters] if len(joined_lines_str) > max_characters: new_partial_lines_str = partial_lines_str.rsplit('\n\n\n', 1)[0] else: new_partial_lines_str = partial_lines_str lines_block_list.append(new_partial_lines_str) joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '') logging.debug('BAZARR is sending {} blocks to Google Translate'.format( len(lines_block_list))) for block_str in lines_block_list: try: translated_partial_srt_text = GoogleTranslator( source='auto', target=language_code_convert_dict.get( lang_obj.alpha2, lang_obj.alpha2)).translate(text=block_str) except: return False else: translated_partial_srt_list = translated_partial_srt_text.split( '\n\n\n') translated_lines_list += translated_partial_srt_list logging.debug( 'BAZARR saving translated subtitles to {}'.format(dest_srt_file)) for i, line in enumerate(subs): line.plaintext = translated_lines_list[i] subs.save(dest_srt_file) return dest_srt_file