def prepare_russian_sentences(self):
        rows = FileManager.get_file_rows(self.russian_sentences_filename)
        res = []
        for row in rows:
            res.append(RussianSentence(row))

        return res
    def prepare_english_sentences(self):
        rows = FileManager.get_file_rows(self.english_sentences_filename)
        res = []
        for row in rows:
            res.append(EnglishSentence(row))

        return res
    def get_translated_lines(input_file: str):
        res = []
        file_rows = FileManager.get_file_rows(input_file)

        for i, file_row in enumerate(file_rows):
            if file_row != '\n':
                res.append(file_row)
        return res
    def get_timelines(input_file: str) -> list:
        res = []
        file_rows = FileManager.get_file_rows(input_file)

        for i, file_row in enumerate(file_rows):
            if Subtitle.is_timeline(file_row):
                res.append(file_row)
        return res
    def prepare_timelines_string_length(input_file: str, output_file: str):
        result = []
        file_rows = FileManager.get_file_rows(input_file)
        subtitles = HandleFunctions.get_subtitles(file_rows)

        for i, subtitle in enumerate(subtitles):
            if i - 1 < len(subtitles) and subtitle.is_large_subtitle():
                splitted_subtitle_list = subtitle.split()
                result = result + splitted_subtitle_list
            else:
                result.append(subtitle)

        for i, subtitle in enumerate(result):
            HandleFunctions.save_str_to_file(output_file, subtitle.timeline.to_string().strip() + '\n')
            HandleFunctions.save_str_to_file(output_file, subtitle.subtitle_string.to_string().strip() + '\n')
            HandleFunctions.save_str_to_file(output_file, '\n')
Beispiel #6
0
    def fix(self):
        caption_rows = FileManager.get_file_rows(self.input_filename)

        for i, caption_row in enumerate(caption_rows):
            if Subtitle.is_timeline(caption_row):

                if i + 3 < len(caption_rows):
                    timeline_splitted = caption_row.split(',')
                    timeline_splitted_next = caption_rows[i + 3].split(',')
                    HandleFunctions.save_str_to_file(
                        self.output_filename, timeline_splitted[0] + ',' +
                        timeline_splitted_next[0] + '\n')
                else:
                    HandleFunctions.save_str_to_file(self.output_filename,
                                                     caption_row)
            else:
                HandleFunctions.save_str_to_file(self.output_filename,
                                                 caption_row)
    def start(self):
        captions_raw = FileManager.get_file_rows(self.captions_filename)
        result = []

        # Для каждой строки caption определяем номер предложения и  количество символов
        for caption_raw in captions_raw:
            caption_line = CaptionLine(caption_raw)

            if TextHelper.is_new_line(caption_raw):
                result.append(caption_line)
                continue

            if caption_line.substrings_cnt == 1:
                eng_sentence = self.get_first_not_handled_english_sentence()
                percent = int(caption_line.get_full_len() /
                              eng_sentence.get_full_len() * 100)
                eng_sentence.add_percent(percent)
                sentence_part = self.get_first_not_empty_russian_sentence(
                ).get_part_of_string_by_percent(percent)
                caption_line.add_formatted_text(sentence_part)

            if caption_line.substrings_cnt > 1:
                for i in range(caption_line.substrings_cnt):
                    eng_sentence = self.get_first_not_handled_english_sentence(
                    )
                    len_cap_line = len(caption_line.substrings[i])

                    percent = int(len_cap_line / eng_sentence.get_full_len() *
                                  100)
                    eng_sentence.add_percent(percent)
                    sentence_part = self.get_first_not_empty_russian_sentence(
                    ).get_part_of_string_by_percent(percent)
                    caption_line.add_formatted_text(sentence_part)

            result.append(caption_line)

        self.save_list_to_file(result)
 def remove_timelines(input_file: str, output_file: str):
     file_rows = FileManager.get_file_rows(input_file)
     for i, file_row in enumerate(file_rows):
         if not Subtitle.is_timeline(file_row):
             HandleFunctions.save_str_to_file(output_file, file_row)
        output_file = os.getcwd() + '/files/res.sbv'
        # print("Enter translated subs file...")
        subs_file = os.getcwd() + '/files/captions_rus_result.sbv'
        hF.concat_translate_with_timelines(input_file, output_file, subs_file)

    if operation == 3:
        input_file = 'files/captions_concatted_rus.sbv'
        output_file = 'files/result.sbv'
        hF.prepare_timelines_string_length(input_file, output_file)

    if operation == 4:
        input_file = 'files/captions_without_timelines.sbv'
        punctuation_file = 'files/punctuation.txt'
        output_file = os.getcwd() + '/files/captions_without_timelines_improved.sbv'

        transcribe_text_improver = TranscribeTextImprover(FileManager.get_file_rows(input_file), FileManager.get_file_rows(punctuation_file), output_file)
        transcribe_text_improver.start()

    if operation == 5:
        input_file = os.getcwd() + '/files/captions_without_timelines.sbv'
        output_file = os.getcwd() + '/files/captions_for_google_translate.sbv'

        preparator = ToGoogleTranslateCaptionsPreparator(input_file, output_file)
        preparator.start()

    if operation == 6:
        eng_file = os.getcwd() + '/files/captions_for_google_translate.sbv'
        rus_file = os.getcwd() + '/files/captions_russian_raw.sbv'
        captions_file = os.getcwd() + '/files/captions_without_timelines.sbv'
        output_file = os.getcwd() + '/files/captions_rus_result.sbv'