def prepare_russian_sentences(self): rows = FileManager.get_file_rows(self.russian_sentences_filename) res = [] for row in rows: res.append(RussianSentence(row)) return res
def prepare_english_sentences(self): rows = FileManager.get_file_rows(self.english_sentences_filename) res = [] for row in rows: res.append(EnglishSentence(row)) return res
def get_translated_lines(input_file: str): res = [] file_rows = FileManager.get_file_rows(input_file) for i, file_row in enumerate(file_rows): if file_row != '\n': res.append(file_row) return res
def get_timelines(input_file: str) -> list: res = [] file_rows = FileManager.get_file_rows(input_file) for i, file_row in enumerate(file_rows): if Subtitle.is_timeline(file_row): res.append(file_row) return res
def prepare_timelines_string_length(input_file: str, output_file: str): result = [] file_rows = FileManager.get_file_rows(input_file) subtitles = HandleFunctions.get_subtitles(file_rows) for i, subtitle in enumerate(subtitles): if i - 1 < len(subtitles) and subtitle.is_large_subtitle(): splitted_subtitle_list = subtitle.split() result = result + splitted_subtitle_list else: result.append(subtitle) for i, subtitle in enumerate(result): HandleFunctions.save_str_to_file(output_file, subtitle.timeline.to_string().strip() + '\n') HandleFunctions.save_str_to_file(output_file, subtitle.subtitle_string.to_string().strip() + '\n') HandleFunctions.save_str_to_file(output_file, '\n')
def fix(self): caption_rows = FileManager.get_file_rows(self.input_filename) for i, caption_row in enumerate(caption_rows): if Subtitle.is_timeline(caption_row): if i + 3 < len(caption_rows): timeline_splitted = caption_row.split(',') timeline_splitted_next = caption_rows[i + 3].split(',') HandleFunctions.save_str_to_file( self.output_filename, timeline_splitted[0] + ',' + timeline_splitted_next[0] + '\n') else: HandleFunctions.save_str_to_file(self.output_filename, caption_row) else: HandleFunctions.save_str_to_file(self.output_filename, caption_row)
def start(self): captions_raw = FileManager.get_file_rows(self.captions_filename) result = [] # Для каждой строки caption определяем номер предложения и количество символов for caption_raw in captions_raw: caption_line = CaptionLine(caption_raw) if TextHelper.is_new_line(caption_raw): result.append(caption_line) continue if caption_line.substrings_cnt == 1: eng_sentence = self.get_first_not_handled_english_sentence() percent = int(caption_line.get_full_len() / eng_sentence.get_full_len() * 100) eng_sentence.add_percent(percent) sentence_part = self.get_first_not_empty_russian_sentence( ).get_part_of_string_by_percent(percent) caption_line.add_formatted_text(sentence_part) if caption_line.substrings_cnt > 1: for i in range(caption_line.substrings_cnt): eng_sentence = self.get_first_not_handled_english_sentence( ) len_cap_line = len(caption_line.substrings[i]) percent = int(len_cap_line / eng_sentence.get_full_len() * 100) eng_sentence.add_percent(percent) sentence_part = self.get_first_not_empty_russian_sentence( ).get_part_of_string_by_percent(percent) caption_line.add_formatted_text(sentence_part) result.append(caption_line) self.save_list_to_file(result)
def remove_timelines(input_file: str, output_file: str): file_rows = FileManager.get_file_rows(input_file) for i, file_row in enumerate(file_rows): if not Subtitle.is_timeline(file_row): HandleFunctions.save_str_to_file(output_file, file_row)
output_file = os.getcwd() + '/files/res.sbv' # print("Enter translated subs file...") subs_file = os.getcwd() + '/files/captions_rus_result.sbv' hF.concat_translate_with_timelines(input_file, output_file, subs_file) if operation == 3: input_file = 'files/captions_concatted_rus.sbv' output_file = 'files/result.sbv' hF.prepare_timelines_string_length(input_file, output_file) if operation == 4: input_file = 'files/captions_without_timelines.sbv' punctuation_file = 'files/punctuation.txt' output_file = os.getcwd() + '/files/captions_without_timelines_improved.sbv' transcribe_text_improver = TranscribeTextImprover(FileManager.get_file_rows(input_file), FileManager.get_file_rows(punctuation_file), output_file) transcribe_text_improver.start() if operation == 5: input_file = os.getcwd() + '/files/captions_without_timelines.sbv' output_file = os.getcwd() + '/files/captions_for_google_translate.sbv' preparator = ToGoogleTranslateCaptionsPreparator(input_file, output_file) preparator.start() if operation == 6: eng_file = os.getcwd() + '/files/captions_for_google_translate.sbv' rus_file = os.getcwd() + '/files/captions_russian_raw.sbv' captions_file = os.getcwd() + '/files/captions_without_timelines.sbv' output_file = os.getcwd() + '/files/captions_rus_result.sbv'