Example #1
0
def main():
    filename = "/home/tc/DATA/subtest/test1.wav"

    audio_filename, audio_rate = extract_audio(filename)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(10)

    converter=FLACConverter(source_path=audio_filename)
    recognizer= SpeechRecognizer(language=args.src_language, rate=audio_rate,api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []

    if regions:
        try:
            widgets = ["Converting speech regions to FLAC files",
                        Percentage(), ' ', Bar(), ' ', ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter, regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print "Cancelling transcription"
            return 1

    timed_subtitles = [(r,t) for r, t in zip(regions, transcript) if t]
    formatter = FORMATTERS.get(ars.format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = args.output

    if not dest:
        base, ext = os.path.splitext(args.source_path)
        dest = "{base}.{format}".format(base=base,format=args.format)

    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))

    print "subtitles file created at {}".format(dest)

    os.remove(audio_filename)

    return 0
Example #2
0
def emit_subtitles(dest, source_path, lang, format, regions, transcripts):
    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(format)
    formatted_subtitles = formatter(timed_subtitles)

    if not dest:
        base, ext = os.path.splitext(source_path)
        dest = "{base}.{format}".format(base=base, format=format)

    base, ext = os.path.splitext(dest)
    dest = "{base}.subs.{lang}{ext}".format(base=base, lang=lang, ext=ext)

    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))

    return dest
Example #3
0
        def main():
            parser = argparse.ArgumentParser()
            parser.add_argument(
                'source_path',
                help="Path to the video or audio file to subtitle",
                nargs='?')
            parser.add_argument(
                '-C',
                '--concurrency',
                help="Number of concurrent API requests to make",
                type=int,
                default=10)
            parser.add_argument(
                '-o',
                '--output',
                help=
                "Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)"
            )
            parser.add_argument('-F',
                                '--format',
                                help="Destination subtitle format",
                                default="srt")
            parser.add_argument('-S',
                                '--src-language',
                                help="Language spoken in source file",
                                default="en")
            parser.add_argument('-D',
                                '--dst-language',
                                help="Desired language for the subtitles",
                                default="en")
            parser.add_argument(
                '-K',
                '--api-key',
                help=
                "The Google Translate API key to be used. (Required for subtitle translation)"
            )
            parser.add_argument('--list-formats',
                                help="List all available subtitle formats",
                                action='store_true')
            parser.add_argument(
                '--list-languages',
                help="List all available source/destination languages",
                action='store_true')

            args = parser.parse_args()
            print args

            if (os.name == "posix"):
                args.source_path = str(self.filename)
            else:
                args.source_path = (str(self.filename)).replace("/", "\\")
                pas = (args.source_path).replace("/", "\\")
                args.source_path = pas
                print " Printing pas >>>", pas
            print args

            path = args.source_path[:-3]
            srt_path = path + "srt"

            if args.list_formats:
                print("List of formats:")
                for subtitle_format in FORMATTERS.keys():
                    print("{format}".format(format=subtitle_format))
                return 0

            if args.list_languages:
                print("List of all languages:")
                for code, language in sorted(LANGUAGE_CODES.items()):
                    print("{code}\t{language}".format(code=code,
                                                      language=languages))
                return 0

            if args.format not in FORMATTERS.keys():
                print(
                    "Subtitle format not supported. Run with --list-formats to see all supported formats."
                )
                return 1

            if args.src_language not in LANGUAGE_CODES.keys():
                print(
                    "Source language not supported. Run with --list-languages to see all supported languages."
                )
                return 1

            if args.dst_language not in LANGUAGE_CODES.keys():
                print(
                    "Destination language not supported. Run with --list-languages to see all supported languages."
                )
                return 1

            if not args.source_path:
                print("Error: You need to specify a source path.")
                return 1

            audio_filename, audio_rate = extract_audio(args.source_path)

            regions = find_speech_regions(audio_filename)
            pool = ProcessingPool(args.concurrency)
            converter = FLACConverter(source_path=audio_filename)
            recognizer = SpeechRecognizer(language=args.src_language,
                                          rate=audio_rate,
                                          api_key=GOOGLE_SPEECH_API_KEY)

            transcripts = []
            if regions:
                try:
                    widgets = [
                        "Converting speech regions to FLAC files: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    extracted_regions = []
                    for i, extracted_region in enumerate(
                            pool.imap(converter, regions)):
                        extracted_regions.append(extracted_region)
                        pbar.update(i)
                        self.progress1.setValue(i)
                    pbar.finish()

                    widgets = [
                        "Performing speech recognition: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()

                    for i, transcript in enumerate(
                            pool.imap(recognizer, extracted_regions)):
                        transcripts.append(transcript)
                        pbar.update(i)
                        self.progress2.setValue(i)
                    pbar.finish()
                    QMessageBox.about(self, "Subtitles created",
                                      "Created at " + srt_path)
                    if not is_same_language(args.src_language,
                                            args.dst_language):
                        if args.api_key:
                            google_translate_api_key = args.api_key
                            translator = Translator(args.dst_language,
                                                    google_translate_api_key,
                                                    dst=args.dst_language,
                                                    src=args.src_language)
                            prompt = "Translating from {0} to {1}: ".format(
                                args.src_language, args.dst_language)
                            widgets = [
                                prompt,
                                Percentage(), ' ',
                                Bar(), ' ',
                                ETA()
                            ]
                            pbar = ProgressBar(widgets=widgets,
                                               maxval=len(regions)).start()
                            translated_transcripts = []
                            for i, transcript in enumerate(
                                    pool.imap(translator, transcripts)):
                                translated_transcripts.append(transcript)
                                pbar.update(i)
                                self.progress2.setValue(i)
                            pbar.finish()
                            transcripts = translated_transcripts
                        else:
                            print "Error: Subtitle translation requires specified Google Translate API key. \See --help for further information."
                            return 1

                except KeyboardInterrupt:
                    pbar.finish()
                    pool.terminate()
                    pool.join()
                    print "Cancelling transcription"
                    return 1

            timed_subtitles = [(r, t) for r, t in zip(regions, transcripts)
                               if t]
            formatter = FORMATTERS.get(args.format)
            formatted_subtitles = formatter(timed_subtitles)

            dest = args.output

            if not dest:
                base, ext = os.path.splitext(args.source_path)
                dest = "{base}.{format}".format(base=base, format=args.format)

            with open(dest, 'wb') as f:
                f.write(formatted_subtitles.encode("utf-8"))

            print "Subtitles file created at {}".format(dest)

            os.remove(audio_filename)

            return 0
    def generate_subtitles(  # pylint: disable=too-many-locals,too-many-arguments
            source_path,
            src_language,
            listener_progress,
            output=None,
            concurrency=DEFAULT_CONCURRENCY,
            subtitle_file_format=DEFAULT_SUBTITLE_FORMAT):

        # windows not support forkserver... only spawn
        if os.name != "nt" and "Darwin" in os.uname():
            # necessary for running on MacOS
            # method can be set only once, otherwise crash
            #from python 3.8 above the default for macos is spawn and not fork
            if 'spawn' != multiprocessing.get_start_method(allow_none=True):
                multiprocessing.set_start_method('spawn')
        Ctr_Autosub.cancel = False
        Ctr_Autosub.step = 0
        """
        Given an input audio/video file, generate subtitles in the specified language and format.
        """
        audio_filename, audio_rate = extract_audio(source_path)

        regions = find_speech_regions(audio_filename)

        converter = FLACConverter(source_path=audio_filename)
        recognizer = SpeechRecognizer(language=src_language,
                                      rate=audio_rate,
                                      api_key=GOOGLE_SPEECH_API_KEY)
        transcripts = []
        if regions:
            try:
                if Ctr_Autosub.cancel:
                    return -1

                str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files "
                len_regions = len(regions)
                extracted_regions = []
                Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
                for i, extracted_region in enumerate(
                        Ctr_Autosub.pool.imap(converter, regions)):
                    Ctr_Autosub.step = 1
                    extracted_regions.append(extracted_region)
                    progress_percent = MyUtil.percentage(i, len_regions)
                    Ctr_Autosub.output_progress(listener_progress, str_task_1,
                                                progress_percent)
                if Ctr_Autosub.cancel:
                    return -1
                else:
                    Ctr_Autosub.pool.close()
                    Ctr_Autosub.pool.join()

                str_task_2 = "Step 2 of 2: Performing speech recognition "
                Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
                for i, transcript in enumerate(
                        Ctr_Autosub.pool.imap(recognizer, extracted_regions)):
                    Ctr_Autosub.step = 2
                    transcripts.append(transcript)
                    progress_percent = MyUtil.percentage(i, len_regions)
                    Ctr_Autosub.output_progress(listener_progress, str_task_2,
                                                progress_percent)

                if Ctr_Autosub.cancel:
                    return -1
                else:
                    Ctr_Autosub.pool.close()
                    Ctr_Autosub.pool.join()

            except KeyboardInterrupt:
                Ctr_Autosub.pbar.finish()
                Ctr_Autosub.pool.terminate()
                Ctr_Autosub.pool.join()
                raise

        timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
        formatter = FORMATTERS.get(subtitle_file_format)
        formatted_subtitles = formatter(timed_subtitles)

        dest = output

        if not dest:
            base = os.path.splitext(source_path)[0]
            dest = "{base}.{format}".format(base=base,
                                            format=subtitle_file_format)

        with open(dest, 'wb') as output_file:
            output_file.write(formatted_subtitles.encode("utf-8"))

        os.remove(audio_filename)

        if Ctr_Autosub.cancel:
            return -1
        else:
            Ctr_Autosub.pool.close()
            Ctr_Autosub.pool.join()

        return dest
Example #5
0
def generate_subtitles(
    source_path,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language_list=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    dest_list = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            transcripts = []
            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            for dst_language in dst_language_list:
                translated_transcripts = []
                if not is_same_language(src_language, dst_language):
                    if api_key:
                        google_translate_api_key = api_key
                        translator = Translator(dst_language,
                                                google_translate_api_key,
                                                dst=dst_language,
                                                src=src_language)
                        prompt = "Translating from {0} to {1}: ".format(
                            src_language, dst_language)
                        widgets = [
                            prompt,
                            Percentage(), ' ',
                            Bar(), ' ',
                            ETA()
                        ]
                        pbar = ProgressBar(widgets=widgets,
                                           maxval=len(regions)).start()
                        for i, transcript in enumerate(
                                pool.imap(translator, transcripts)):
                            translated_transcripts.append(transcript)
                            pbar.update(i)
                        pbar.finish()
                    else:
                        print(
                            "Error: Subtitle translation requires specified Google Translate API key. "
                            "See --help for further information.")
                        return 1

                if len(translated_transcripts) > 0:
                    timed_subtitles = [
                        (r, t) for r, t in zip(regions, translated_transcripts)
                        if t
                    ]
                else:
                    timed_subtitles = [(r, t)
                                       for r, t in zip(regions, transcripts)
                                       if t]

                formatter = FORMATTERS.get(subtitle_file_format)
                formatted_subtitles = formatter(timed_subtitles)

                dest = output
                if not dest:
                    base, ext = os.path.splitext(source_path)
                    dest = "{base}_{lang}.{format}".format(
                        base=base,
                        lang=dst_language,
                        format=subtitle_file_format)

                dest_list.append(dest)
                with open(dest, 'wb') as f:
                    f.write(formatted_subtitles.encode("utf-8"))

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    os.remove(audio_filename)

    return dest_list
Example #6
0
def generate_subtitles(source_path,
                       output=None,
                       concurrency=DEFAULT_CONCURRENCY,
                       src_language=DEFAULT_SRC_LANGUAGE,
                       dst_language=DEFAULT_DST_LANGUAGE,
                       subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
                       api_key=None,
                       min_sample_length=DEFAULT_MIN_LENGTH,
                       max_sample_length=DEFAULT_MAX_LENGTH,
                       silent_percentile=DEFAULT_PERCENTILE,
                       silent_frame_cut=DEFAULT_FRAME_CUT,
                       interval=DEFAULT_INTERVAL):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename,
                                  silent_percentile=silent_percentile,
                                  min_region_size=min_sample_length,
                                  max_region_size=max_sample_length,
                                  silent_frame_cut=silent_frame_cut,
                                  percentile_interval=interval)

    print("Found %i regions with potential speech." % len(regions))

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language,
                                            google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(
                        src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    print("Found %i segments with voice." % len(timed_subtitles))

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest
Example #7
0
def generate_subtitles(
    audio_filename,
    audio_rate,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)

            if not is_same_language(src_language, dst_language):
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language,
                                            google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base, ext = os.path.splitext(audio_filename)
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))

    return dest
Example #8
0
def generate_subtitles(  # pylint: disable=too-many-locals,too-many-arguments
    source_path,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if api_key:
                    translator = TranslatorWithApikey(dst_language,
                                                      api_key,
                                                      dst=dst_language,
                                                      src=src_language)
                    print("Using free translation API...")
                else:
                    translator = TranslatorWithoutApikey(
                        src_language, dst_language)
                    print("Using specific translation API...")

                prompt = "Translating from {0} to {1}: ".format(
                    src_language, dst_language)
                widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                pbar = ProgressBar(widgets=widgets,
                                   maxval=len(regions)).start()
                translated_transcripts = []
                for i, transcript in enumerate(
                        pool.imap(translator, transcripts)):
                    translated_transcripts.append(transcript)
                    pbar.update(i)
                pbar.finish()
                transcripts = translated_transcripts

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest
Example #9
0
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
        source_path,
        output=None,
        concurrency=DEFAULT_CONCURRENCY,
        src_language=DEFAULT_SRC_LANGUAGE,
        dst_language=DEFAULT_DST_LANGUAGE,
        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
        api_key=None,
    ):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """

    if os.name != "nt" and "Darwin" in os.uname():
        #the default unix fork method does not work on Mac OS
        #need to use forkserver
        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
            multiprocessing.set_start_method('forkserver')

    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
                       ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter, regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language, google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information."
                    )
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest
Example #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('source_path',
                        help="Path to the video or audio file to subtitle",
                        nargs='?')
    parser.add_argument('-C',
                        '--concurrency',
                        help="Number of concurrent API requests to make",
                        type=int,
                        default=10)
    parser.add_argument(
        '-o',
        '--output',
        help="Output path for subtitles (by default, subtitles are saved in \
                    the same directory and name as the source path)")
    parser.add_argument('-F',
                        '--format',
                        help="Destination subtitle format",
                        default="srt")
    parser.add_argument('-S',
                        '--src-language',
                        help="Language spoken in source file",
                        default="en")
    parser.add_argument('-D',
                        '--dst-language',
                        help="Desired language for the subtitles",
                        default="en")
    parser.add_argument(
        '-K',
        '--api-key',
        help=
        "The Google Translate API key to be used. (Required for subtitle translation)"
    )
    parser.add_argument('--list-formats',
                        help="List all available subtitle formats",
                        action='store_true')
    parser.add_argument('--list-languages',
                        help="List all available source/destination languages",
                        action='store_true')

    args = parser.parse_args()

    if args.list_formats:
        print("List of formats:")
        for subtitle_format in FORMATTERS.keys():
            print("{format}".format(format=subtitle_format))
        return 0

    if args.list_languages:
        print("List of all languages:")
        for code, language in sorted(LANGUAGE_CODES.items()):
            print("{code}\t{language}".format(code=code, language=language))
        return 0

    if args.format not in FORMATTERS.keys():
        print(
            "Subtitle format not supported. Run with --list-formats to see all supported formats."
        )
        return 1

    if args.src_language not in LANGUAGE_CODES.keys():
        print(
            "Source language not supported. Run with --list-languages to see all supported languages."
        )
        return 1

    if args.dst_language not in LANGUAGE_CODES.keys():
        print(
            "Destination language not supported. Run with --list-languages to see all supported languages."
        )
        return 1

    #if not args.source_path:
    #    print("Error: You need to specify a source path.")
    #    return 1

    config = get_config("apikey.conf")
    args.api_key = config['apikey']
    args.src_language = config['source_lang']
    args.dst_language = config['dest_lang']
    transpath = config['path']
    entries = os.listdir(transpath)

    for fp in entries:
        # Split the extension from the path and normalise it to lowercase.
        ext = os.path.splitext(fp)[-1].lower()
        rpath = os.path.normpath(os.path.join(transpath, fp))
        #rpath = os.path.abspath(fp)
        print(rpath)

        rightf = ext.endswith(('.mp4', '.mp3', '.m4a', '.wav', 'mov', '.3gp',
                               '.avi', '.ogg', '.webm', 'mkv', 'flv'))
        if rightf:
            #command = ["python.exe Scripts/autosub", "-S", "en", "-D", "zh-TW", "-K", "AIzaSyCEPufSi1M0SD-dcmfAnDZNw6tUwHcsLzM", rpath]
            #print(command)
            #subprocess.check_output(command, stdin=open(os.devnull))
            #else:
            #    print fp, "is an unknown file format.

            audio_filename, audio_rate = extract_audio(rpath)

            regions = find_speech_regions(audio_filename)

            pool = multiprocessing.Pool(args.concurrency)
            converter = FLACConverter(source_path=audio_filename)
            recognizer = SpeechRecognizer(language=args.src_language,
                                          rate=audio_rate,
                                          api_key=GOOGLE_SPEECH_API_KEY)

            transcripts = []
            if regions:
                try:
                    widgets = [
                        "Converting speech regions to FLAC files: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    extracted_regions = []
                    for i, extracted_region in enumerate(
                            pool.imap(converter, regions)):
                        extracted_regions.append(extracted_region)
                        pbar.update(i)
                    pbar.finish()

                    widgets = [
                        "Performing speech recognition: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()

                    for i, transcript in enumerate(
                            pool.imap(recognizer, extracted_regions)):
                        transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()

                    if not is_same_language(args.src_language,
                                            args.dst_language):
                        if args.api_key:
                            google_translate_api_key = args.api_key
                            translator = Translator(args.dst_language,
                                                    google_translate_api_key,
                                                    dst=args.dst_language,
                                                    src=args.src_language)
                            prompt = "Translating from {0} to {1}: ".format(
                                args.src_language, args.dst_language)
                            widgets = [
                                prompt,
                                Percentage(), ' ',
                                Bar(), ' ',
                                ETA()
                            ]
                            pbar = ProgressBar(widgets=widgets,
                                               maxval=len(regions)).start()
                            translated_transcripts = []
                            for i, transcript in enumerate(
                                    pool.imap(translator, transcripts)):
                                translated_transcripts.append(transcript)
                                pbar.update(i)
                            pbar.finish()
                            transcripts = translated_transcripts
                        else:
                            print(
                                "Error: Subtitle translation requires specified Google Translate API key. \
                                    See --help for further information.")
                            return 1

                except KeyboardInterrupt:
                    pbar.finish()
                    pool.terminate()
                    pool.join()
                    print("Cancelling transcription")
                    return 1

            timed_subtitles = [(r, t) for r, t in zip(regions, transcripts)
                               if t]
            formatter = FORMATTERS.get(args.format)
            formatted_subtitles = formatter(timed_subtitles)

            dest = args.output

            if not dest:
                base, ext = os.path.splitext(rpath)
                dest = "{base}.{format}".format(base=base, format=args.format)

            with open(dest, 'wb') as f:
                f.write(formatted_subtitles.encode("utf-8"))

            print("Subtitles file created at {}".format(dest))

            os.remove(audio_filename)

    return 0
Example #11
0
    def generate_subtitles(
            source_path,
            src_language,
            listener_progress,
            output=None,
            concurrency=DEFAULT_CONCURRENCY,
            subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
            audio_filename='',
            audio_rate='',
            name=''
        ):      

        

        regions = find_speech_regions(audio_filename)

        converter = FLACConverter(source_path=audio_filename)
        recognizer = SpeechRecognizer(language=src_language, 
                                        rate=audio_rate,
                                        api_key=GOOGLE_SPEECH_API_KEY)
        transcripts = []
        if regions:
            try: 
                print("Step 1 of 2: Converting speech regions to FLAC files ")
                len_regions = len(regions)
                extracted_regions = []

                subtitles.pool = multiprocessing.Pool(concurrency)
                for i, extracted_region in enumerate(subtitles.pool.imap(converter, regions)):                    
                    extracted_regions.append(extracted_region)
                    print(i)
                  
                subtitles.stop()

                print("Step 2 of 2: Performing speech recognition ")
                subtitles.pool = multiprocessing.Pool(concurrency)
                for i, transcript in enumerate(subtitles.pool.imap(recognizer, extracted_regions)):                   
                    transcripts.append(transcript)
                    print(i)       
                subtitles.stop()
               
                

            except KeyboardInterrupt:
                subtitles.pbar.finish()
                subtitles.stop()
                raise

        timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
        formatter = FORMATTERS.get(subtitle_file_format)
        formatted_subtitles = formatter(timed_subtitles)

        dest = output

        if not dest:
            base = os.path.splitext(source_path)[0]+'\\'+name
            dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

        with open(dest, 'wb') as output_file:
            output_file.write(formatted_subtitles.encode("utf-8"))

        os.remove(audio_filename)

        subtitles.stop()
        return dest
Example #12
0
def generate_subtitles(  # pylint: disable=too-many-locals,too-many-arguments
        source_path,
        output=None,
        concurrency=DEFAULT_CONCURRENCY,
        src_language=DEFAULT_SRC_LANGUAGE,
        dst_language=DEFAULT_DST_LANGUAGE,
        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
        project_id=None,
        location="global",
        model="default"):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)
    tpool = concurrent.futures.ThreadPoolExecutor(concurrency)
    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  model=model)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    tpool.map(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if project_id:
                    translator = Translator(project_id,
                                            location=location,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(
                        src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            tpool.map(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest
Example #13
0
    def OpenFile(self, filename=None):
        """Open a media file in a MediaPlayer
        
        if filename is None:
            filename = QtGui.QFileDialog.getOpenFileName(self, "Open File", os.path.expanduser('~'))
        if not filename:
            return"""

        filename = QtGui.QFileDialog.getOpenFileName(self, 'Open File')
        formats = ['.mp4', '.mkv', '.avi', '.MP4', '.MKV', '.AVI']
        base, ext = os.path.splitext(str(filename))
        self.sub_path = base + '.' + 'srt'
        if filename == "" or ext not in formats:
            self.invalid_file()
        else:
            if os.path.isfile(base + '.' + 'srt'):
                print 'subtitle file already present'
            else:
                audio_filename, audio_rate = extract_audio(str(filename))
                command = "ffmpeg -i " + str(
                    filename
                ) + " -acodec pcm_s16le -ac 1 -ar 16000 " + base + ".wav"
                print command
                subprocess.call(command, shell=True)
                regions = find_speech_regions(audio_filename)
                # freeze_support()
                pool = multiprocessing.Pool(10)
                converter = FLACConverter(source_path=audio_filename)
                recognizer = SpeechRecognizer(language="en",
                                              rate=audio_rate,
                                              api_key=GOOGLE_SPEECH_API_KEY)

                transcripts = []
                if regions:
                    try:
                        widgets = [
                            "Converting speech regions to FLAC files: ",
                            Percentage(), ' ',
                            Bar(), ' ',
                            ETA()
                        ]
                        pbar = ProgressBar(widgets=widgets,
                                           maxval=len(regions)).start()
                        # self.progressLabel.setText("Converting speech regions to FLAC files: ")
                        extracted_regions = []
                        for i, extracted_region in enumerate(
                                pool.imap(converter, regions)):
                            extracted_regions.append(extracted_region)
                            pbar.update(i)
                            # self.progress.setValue(i * 100 / 66)
                        pbar.finish()

                        # self.progress.setValue(0)

                        widgets = [
                            "Performing speech recognition: ",
                            Percentage(), ' ',
                            Bar(), ' ',
                            ETA()
                        ]
                        pbar = ProgressBar(widgets=widgets,
                                           maxval=len(regions)).start()
                        # self.progressLabel.setText("Performing speech recognition: ")
                        for i, transcript in enumerate(
                                pool.imap(recognizer, extracted_regions)):
                            transcripts.append(transcript)
                            pbar.update(i)
                            # self.progress.setValue(i * 100 / 39)
                        pbar.finish()
                        # self.progress.setValue(100)

                        if not is_same_language("en", "en"):
                            if args.api_key:
                                google_translate_api_key = self.args.api_key
                                translator = Translator(
                                    self.args.dst_language,
                                    google_translate_api_key,
                                    dst=self.args.dst_language,
                                    src=self.args.src_language)
                                prompt = "Translating from {0} to {1}: ".format(
                                    self.args.src_language,
                                    self.args.dst_language)
                                widgets = [
                                    prompt,
                                    Percentage(), ' ',
                                    Bar(), ' ',
                                    ETA()
                                ]
                                pbar = ProgressBar(
                                    widgets=widgets,
                                    maxval=len(regions)).start()
                                translated_transcripts = []
                                # self.progress.setValue(0)
                                for i, transcript in enumerate(
                                        pool.imap(translator, transcripts)):
                                    translated_transcripts.append(transcript)
                                    pbar.update(i)
                                    # self.progress.setValue(i)
                                pbar.finish()
                                # self.progress.setValue(100)
                                transcripts = translated_transcripts
                            else:
                                print "Error: Subtitle translation requires specified Google Translate API key. \
                                See --help for further information."

                                return 1

                    except KeyboardInterrupt:
                        pbar.finish()
                        pool.terminate()
                        pool.join()
                        print "Cancelling transcription"
                        return 1

                timed_subtitles = [(r, t)
                                   for r, t in zip(regions, transcripts) if t]
                formatter = FORMATTERS.get("srt")
                formatted_subtitles = formatter(timed_subtitles)

                base, ext = os.path.splitext(str(filename))
                dest = "{base}.{format}".format(base=base, format="srt")

                with open(dest, 'wb') as f:
                    f.write(formatted_subtitles.encode("utf-8"))

                print "Subtitles file created at {}".format(dest)
                # open_video(0, self.args.source_path)
                # print formatted_subtitles.split('\n')

            # create the media
            self.sec = -1
            if sys.version < '3':
                filename = unicode(filename)
            self.media = self.instance.media_new(filename)
            # put the media in the media player
            self.mediaplayer.set_media(self.media)

            # parse the metadata of the file
            self.media.parse()
            # set the title of the track as window title
            self.setWindowTitle(self.media.get_meta(0))

            # the media player has to be 'connected' to the QFrame
            # (otherwise a video would be displayed in it's own window)

            if sys.platform.startswith(
                    'linux'):  # for Linux using the X Server
                self.mediaplayer.set_xwindow(self.videoframe.winId())
            elif sys.platform == "win32":  # for Windows
                self.mediaplayer.set_hwnd(self.videoframe.winId())
            elif sys.platform == "darwin":  # for MacOS
                self.mediaplayer.set_nsobject(self.videoframe.winId())
            self.PlayPause()
            self.duration = get_duration_wav(base + ".wav")
            f = open(self.sub_path)
            top_10 = {}
            pos = 0
            wrds = [
                'their', 'the', 'a', 'on', 'an', 'i', 'you', 'he', 'she', 'it',
                'we', 'they', 'me', 'him', 'her', 'us', 'them', 'what', 'who',
                'whom', 'mine', 'yours', 'his', 'hers', 'ours', 'theirs',
                'this', 'that', 'these', 'those', 'with', 'at', 'by', 'into',
                'for', 'to', 'up', 'of', 'in', 'is', 'are', 'and', 'as', 'if',
                'from'
            ]
            for i in f.readlines():
                pos += 1
                if (pos + 1) % 4 == 0:
                    for j in i.lower().split():
                        if top_10.get(j) != None:
                            if j not in wrds:
                                top_10[j] += 1
                        else:
                            if j not in wrds:
                                top_10[j] = 1
            top = []
            self.comboBox.clear()
            for key, value in sorted(top_10.iteritems(),
                                     key=lambda (k, v): (v, k),
                                     reverse=True):
                print "%s: %s" % (key, value)
                top.append(value)
                self.comboBox.addItem(str(key))
                if len(top) == 10:
                    break