Python FORMATTERS.get Examples

Programming Language: Python

Namespace/Package Name: autosub.formatters

Class/Type: FORMATTERS

Method/Function: get

Examples at hotexamples.com: 13

Python FORMATTERS.get - 13 examples found. These are the top rated real world Python examples of autosub.formatters.FORMATTERS.get extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get(13)

keys(7)

Frequently Used Methods

get (13)

keys (7)

Example #1

Show file

File: myexp.py Project: tcgriffith/autosub

def main():
    filename = "/home/tc/DATA/subtest/test1.wav"

    audio_filename, audio_rate = extract_audio(filename)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(10)

    converter=FLACConverter(source_path=audio_filename)
    recognizer= SpeechRecognizer(language=args.src_language, rate=audio_rate,api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []

    if regions:
        try:
            widgets = ["Converting speech regions to FLAC files",
                        Percentage(), ' ', Bar(), ' ', ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter, regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print "Cancelling transcription"
            return 1

    timed_subtitles = [(r,t) for r, t in zip(regions, transcript) if t]
    formatter = FORMATTERS.get(ars.format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = args.output

    if not dest:
        base, ext = os.path.splitext(args.source_path)
        dest = "{base}.{format}".format(base=base,format=args.format)

    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))

    print "subtitles file created at {}".format(dest)

    os.remove(audio_filename)

    return 0

Example #2

Show file

def emit_subtitles(dest, source_path, lang, format, regions, transcripts):
    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(format)
    formatted_subtitles = formatter(timed_subtitles)

    if not dest:
        base, ext = os.path.splitext(source_path)
        dest = "{base}.{format}".format(base=base, format=format)

    base, ext = os.path.splitext(dest)
    dest = "{base}.subs.{lang}{ext}".format(base=base, lang=lang, ext=ext)

    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))

    return dest

Example #3

Show file

File: subtitle_GUI_e.py Project: Nidhintsajee/subtitle

        def main():
            parser = argparse.ArgumentParser()
            parser.add_argument(
                'source_path',
                help="Path to the video or audio file to subtitle",
                nargs='?')
            parser.add_argument(
                '-C',
                '--concurrency',
                help="Number of concurrent API requests to make",
                type=int,
                default=10)
            parser.add_argument(
                '-o',
                '--output',
                help=
                "Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)"
            )
            parser.add_argument('-F',
                                '--format',
                                help="Destination subtitle format",
                                default="srt")
            parser.add_argument('-S',
                                '--src-language',
                                help="Language spoken in source file",
                                default="en")
            parser.add_argument('-D',
                                '--dst-language',
                                help="Desired language for the subtitles",
                                default="en")
            parser.add_argument(
                '-K',
                '--api-key',
                help=
                "The Google Translate API key to be used. (Required for subtitle translation)"
            )
            parser.add_argument('--list-formats',
                                help="List all available subtitle formats",
                                action='store_true')
            parser.add_argument(
                '--list-languages',
                help="List all available source/destination languages",
                action='store_true')

            args = parser.parse_args()
            print args

            if (os.name == "posix"):
                args.source_path = str(self.filename)
            else:
                args.source_path = (str(self.filename)).replace("/", "\\")
                pas = (args.source_path).replace("/", "\\")
                args.source_path = pas
                print " Printing pas >>>", pas
            print args

            path = args.source_path[:-3]
            srt_path = path + "srt"

            if args.list_formats:
                print("List of formats:")
                for subtitle_format in FORMATTERS.keys():
                    print("{format}".format(format=subtitle_format))
                return 0

            if args.list_languages:
                print("List of all languages:")
                for code, language in sorted(LANGUAGE_CODES.items()):
                    print("{code}\t{language}".format(code=code,
                                                      language=languages))
                return 0

            if args.format not in FORMATTERS.keys():
                print(
                    "Subtitle format not supported. Run with --list-formats to see all supported formats."
                )
                return 1

            if args.src_language not in LANGUAGE_CODES.keys():
                print(
                    "Source language not supported. Run with --list-languages to see all supported languages."
                )
                return 1

            if args.dst_language not in LANGUAGE_CODES.keys():
                print(
                    "Destination language not supported. Run with --list-languages to see all supported languages."
                )
                return 1

            if not args.source_path:
                print("Error: You need to specify a source path.")
                return 1

            audio_filename, audio_rate = extract_audio(args.source_path)

            regions = find_speech_regions(audio_filename)
            pool = ProcessingPool(args.concurrency)
            converter = FLACConverter(source_path=audio_filename)
            recognizer = SpeechRecognizer(language=args.src_language,
                                          rate=audio_rate,
                                          api_key=GOOGLE_SPEECH_API_KEY)

            transcripts = []
            if regions:
                try:
                    widgets = [
                        "Converting speech regions to FLAC files: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    extracted_regions = []
                    for i, extracted_region in enumerate(
                            pool.imap(converter, regions)):
                        extracted_regions.append(extracted_region)
                        pbar.update(i)
                        self.progress1.setValue(i)
                    pbar.finish()

                    widgets = [
                        "Performing speech recognition: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()

                    for i, transcript in enumerate(
                            pool.imap(recognizer, extracted_regions)):
                        transcripts.append(transcript)
                        pbar.update(i)
                        self.progress2.setValue(i)
                    pbar.finish()
                    QMessageBox.about(self, "Subtitles created",
                                      "Created at " + srt_path)
                    if not is_same_language(args.src_language,
                                            args.dst_language):
                        if args.api_key:
                            google_translate_api_key = args.api_key
                            translator = Translator(args.dst_language,
                                                    google_translate_api_key,
                                                    dst=args.dst_language,
                                                    src=args.src_language)
                            prompt = "Translating from {0} to {1}: ".format(
                                args.src_language, args.dst_language)
                            widgets = [
                                prompt,
                                Percentage(), ' ',
                                Bar(), ' ',
                                ETA()
                            ]
                            pbar = ProgressBar(widgets=widgets,
                                               maxval=len(regions)).start()
                            translated_transcripts = []
                            for i, transcript in enumerate(
                                    pool.imap(translator, transcripts)):
                                translated_transcripts.append(transcript)
                                pbar.update(i)
                                self.progress2.setValue(i)
                            pbar.finish()
                            transcripts = translated_transcripts
                        else:
                            print "Error: Subtitle translation requires specified Google Translate API key. \See --help for further information."
                            return 1

                except KeyboardInterrupt:
                    pbar.finish()
                    pool.terminate()
                    pool.join()
                    print "Cancelling transcription"
                    return 1

            timed_subtitles = [(r, t) for r, t in zip(regions, transcripts)
                               if t]
            formatter = FORMATTERS.get(args.format)
            formatted_subtitles = formatter(timed_subtitles)

            dest = args.output

            if not dest:
                base, ext = os.path.splitext(args.source_path)
                dest = "{base}.{format}".format(base=base, format=args.format)

            with open(dest, 'wb') as f:
                f.write(formatted_subtitles.encode("utf-8"))

            print "Subtitles file created at {}".format(dest)

            os.remove(audio_filename)

            return 0

Example #4

Show file

File: ctr_autosub.py Project: vincent9394/vincent_python

    def generate_subtitles(  # pylint: disable=too-many-locals,too-many-arguments
            source_path,
            src_language,
            listener_progress,
            output=None,
            concurrency=DEFAULT_CONCURRENCY,
            subtitle_file_format=DEFAULT_SUBTITLE_FORMAT):

        # windows not support forkserver... only spawn
        if os.name != "nt" and "Darwin" in os.uname():
            # necessary for running on MacOS
            # method can be set only once, otherwise crash
            #from python 3.8 above the default for macos is spawn and not fork
            if 'spawn' != multiprocessing.get_start_method(allow_none=True):
                multiprocessing.set_start_method('spawn')
        Ctr_Autosub.cancel = False
        Ctr_Autosub.step = 0
        """
        Given an input audio/video file, generate subtitles in the specified language and format.
        """
        audio_filename, audio_rate = extract_audio(source_path)

        regions = find_speech_regions(audio_filename)

        converter = FLACConverter(source_path=audio_filename)
        recognizer = SpeechRecognizer(language=src_language,
                                      rate=audio_rate,
                                      api_key=GOOGLE_SPEECH_API_KEY)
        transcripts = []
        if regions:
            try:
                if Ctr_Autosub.cancel:
                    return -1

                str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files "
                len_regions = len(regions)
                extracted_regions = []
                Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
                for i, extracted_region in enumerate(
                        Ctr_Autosub.pool.imap(converter, regions)):
                    Ctr_Autosub.step = 1
                    extracted_regions.append(extracted_region)
                    progress_percent = MyUtil.percentage(i, len_regions)
                    Ctr_Autosub.output_progress(listener_progress, str_task_1,
                                                progress_percent)
                if Ctr_Autosub.cancel:
                    return -1
                else:
                    Ctr_Autosub.pool.close()
                    Ctr_Autosub.pool.join()

                str_task_2 = "Step 2 of 2: Performing speech recognition "
                Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
                for i, transcript in enumerate(
                        Ctr_Autosub.pool.imap(recognizer, extracted_regions)):
                    Ctr_Autosub.step = 2
                    transcripts.append(transcript)
                    progress_percent = MyUtil.percentage(i, len_regions)
                    Ctr_Autosub.output_progress(listener_progress, str_task_2,
                                                progress_percent)

                if Ctr_Autosub.cancel:
                    return -1
                else:
                    Ctr_Autosub.pool.close()
                    Ctr_Autosub.pool.join()

            except KeyboardInterrupt:
                Ctr_Autosub.pbar.finish()
                Ctr_Autosub.pool.terminate()
                Ctr_Autosub.pool.join()
                raise

        timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
        formatter = FORMATTERS.get(subtitle_file_format)
        formatted_subtitles = formatter(timed_subtitles)

        dest = output

        if not dest:
            base = os.path.splitext(source_path)[0]
            dest = "{base}.{format}".format(base=base,
                                            format=subtitle_file_format)

        with open(dest, 'wb') as output_file:
            output_file.write(formatted_subtitles.encode("utf-8"))

        os.remove(audio_filename)

        if Ctr_Autosub.cancel:
            return -1
        else:
            Ctr_Autosub.pool.close()
            Ctr_Autosub.pool.join()

        return dest

Example #5

Show file

File: __init__.py Project: tbfly/autosub

def generate_subtitles(
    source_path,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language_list=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    dest_list = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            transcripts = []
            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            for dst_language in dst_language_list:
                translated_transcripts = []
                if not is_same_language(src_language, dst_language):
                    if api_key:
                        google_translate_api_key = api_key
                        translator = Translator(dst_language,
                                                google_translate_api_key,
                                                dst=dst_language,
                                                src=src_language)
                        prompt = "Translating from {0} to {1}: ".format(
                            src_language, dst_language)
                        widgets = [
                            prompt,
                            Percentage(), ' ',
                            Bar(), ' ',
                            ETA()
                        ]
                        pbar = ProgressBar(widgets=widgets,
                                           maxval=len(regions)).start()
                        for i, transcript in enumerate(
                                pool.imap(translator, transcripts)):
                            translated_transcripts.append(transcript)
                            pbar.update(i)
                        pbar.finish()
                    else:
                        print(
                            "Error: Subtitle translation requires specified Google Translate API key. "
                            "See --help for further information.")
                        return 1

                if len(translated_transcripts) > 0:
                    timed_subtitles = [
                        (r, t) for r, t in zip(regions, translated_transcripts)
                        if t
                    ]
                else:
                    timed_subtitles = [(r, t)
                                       for r, t in zip(regions, transcripts)
                                       if t]

                formatter = FORMATTERS.get(subtitle_file_format)
                formatted_subtitles = formatter(timed_subtitles)

                dest = output
                if not dest:
                    base, ext = os.path.splitext(source_path)
                    dest = "{base}_{lang}.{format}".format(
                        base=base,
                        lang=dst_language,
                        format=subtitle_file_format)

                dest_list.append(dest)
                with open(dest, 'wb') as f:
                    f.write(formatted_subtitles.encode("utf-8"))

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    os.remove(audio_filename)

    return dest_list

Example #6

Show file

def generate_subtitles(source_path,
                       output=None,
                       concurrency=DEFAULT_CONCURRENCY,
                       src_language=DEFAULT_SRC_LANGUAGE,
                       dst_language=DEFAULT_DST_LANGUAGE,
                       subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
                       api_key=None,
                       min_sample_length=DEFAULT_MIN_LENGTH,
                       max_sample_length=DEFAULT_MAX_LENGTH,
                       silent_percentile=DEFAULT_PERCENTILE,
                       silent_frame_cut=DEFAULT_FRAME_CUT,
                       interval=DEFAULT_INTERVAL):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename,
                                  silent_percentile=silent_percentile,
                                  min_region_size=min_sample_length,
                                  max_region_size=max_sample_length,
                                  silent_frame_cut=silent_frame_cut,
                                  percentile_interval=interval)

    print("Found %i regions with potential speech." % len(regions))

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language,
                                            google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(
                        src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    print("Found %i segments with voice." % len(timed_subtitles))

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest

Example #7

Show file

File: __init__.py Project: Skillshare/autosub

def generate_subtitles(
    audio_filename,
    audio_rate,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)

            if not is_same_language(src_language, dst_language):
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language,
                                            google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base, ext = os.path.splitext(audio_filename)
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))

    return dest

Example #8

Show file

File: __init__.py Project: iWangJiaxiang/autosub

def generate_subtitles(  # pylint: disable=too-many-locals,too-many-arguments
    source_path,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if api_key:
                    translator = TranslatorWithApikey(dst_language,
                                                      api_key,
                                                      dst=dst_language,
                                                      src=src_language)
                    print("Using free translation API...")
                else:
                    translator = TranslatorWithoutApikey(
                        src_language, dst_language)
                    print("Using specific translation API...")

                prompt = "Translating from {0} to {1}: ".format(
                    src_language, dst_language)
                widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                pbar = ProgressBar(widgets=widgets,
                                   maxval=len(regions)).start()
                translated_transcripts = []
                for i, transcript in enumerate(
                        pool.imap(translator, transcripts)):
                    translated_transcripts.append(transcript)
                    pbar.update(i)
                pbar.finish()
                transcripts = translated_transcripts

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest

Example #9

Show file

File: __init__.py Project: franz96521/Anime-translator

def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
        source_path,
        output=None,
        concurrency=DEFAULT_CONCURRENCY,
        src_language=DEFAULT_SRC_LANGUAGE,
        dst_language=DEFAULT_DST_LANGUAGE,
        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
        api_key=None,
    ):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """

    if os.name != "nt" and "Darwin" in os.uname():
        #the default unix fork method does not work on Mac OS
        #need to use forkserver
        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
            multiprocessing.set_start_method('forkserver')

    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
                       ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter, regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language, google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information."
                    )
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest

Example #10

Show file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('source_path',
                        help="Path to the video or audio file to subtitle",
                        nargs='?')
    parser.add_argument('-C',
                        '--concurrency',
                        help="Number of concurrent API requests to make",
                        type=int,
                        default=10)
    parser.add_argument(
        '-o',
        '--output',
        help="Output path for subtitles (by default, subtitles are saved in \
                    the same directory and name as the source path)")
    parser.add_argument('-F',
                        '--format',
                        help="Destination subtitle format",
                        default="srt")
    parser.add_argument('-S',
                        '--src-language',
                        help="Language spoken in source file",
                        default="en")
    parser.add_argument('-D',
                        '--dst-language',
                        help="Desired language for the subtitles",
                        default="en")
    parser.add_argument(
        '-K',
        '--api-key',
        help=
        "The Google Translate API key to be used. (Required for subtitle translation)"
    )
    parser.add_argument('--list-formats',
                        help="List all available subtitle formats",
                        action='store_true')
    parser.add_argument('--list-languages',
                        help="List all available source/destination languages",
                        action='store_true')

    args = parser.parse_args()

    if args.list_formats:
        print("List of formats:")
        for subtitle_format in FORMATTERS.keys():
            print("{format}".format(format=subtitle_format))
        return 0

    if args.list_languages:
        print("List of all languages:")
        for code, language in sorted(LANGUAGE_CODES.items()):
            print("{code}\t{language}".format(code=code, language=language))
        return 0

    if args.format not in FORMATTERS.keys():
        print(
            "Subtitle format not supported. Run with --list-formats to see all supported formats."
        )
        return 1

    if args.src_language not in LANGUAGE_CODES.keys():
        print(
            "Source language not supported. Run with --list-languages to see all supported languages."
        )
        return 1

    if args.dst_language not in LANGUAGE_CODES.keys():
        print(
            "Destination language not supported. Run with --list-languages to see all supported languages."
        )
        return 1

    #if not args.source_path:
    #    print("Error: You need to specify a source path.")
    #    return 1

    config = get_config("apikey.conf")
    args.api_key = config['apikey']
    args.src_language = config['source_lang']
    args.dst_language = config['dest_lang']
    transpath = config['path']
    entries = os.listdir(transpath)

    for fp in entries:
        # Split the extension from the path and normalise it to lowercase.
        ext = os.path.splitext(fp)[-1].lower()
        rpath = os.path.normpath(os.path.join(transpath, fp))
        #rpath = os.path.abspath(fp)
        print(rpath)

        rightf = ext.endswith(('.mp4', '.mp3', '.m4a', '.wav', 'mov', '.3gp',
                               '.avi', '.ogg', '.webm', 'mkv', 'flv'))
        if rightf:
            #command = ["python.exe Scripts/autosub", "-S", "en", "-D", "zh-TW", "-K", "AIzaSyCEPufSi1M0SD-dcmfAnDZNw6tUwHcsLzM", rpath]
            #print(command)
            #subprocess.check_output(command, stdin=open(os.devnull))
            #else:
            #    print fp, "is an unknown file format.

            audio_filename, audio_rate = extract_audio(rpath)

            regions = find_speech_regions(audio_filename)

            pool = multiprocessing.Pool(args.concurrency)
            converter = FLACConverter(source_path=audio_filename)
            recognizer = SpeechRecognizer(language=args.src_language,
                                          rate=audio_rate,
                                          api_key=GOOGLE_SPEECH_API_KEY)

            transcripts = []
            if regions:
                try:
                    widgets = [
                        "Converting speech regions to FLAC files: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    extracted_regions = []
                    for i, extracted_region in enumerate(
                            pool.imap(converter, regions)):
                        extracted_regions.append(extracted_region)
                        pbar.update(i)
                    pbar.finish()

                    widgets = [
                        "Performing speech recognition: ",
                        Percentage(), ' ',
                        Bar(), ' ',
                        ETA()
                    ]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()

                    for i, transcript in enumerate(
                            pool.imap(recognizer, extracted_regions)):
                        transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()

                    if not is_same_language(args.src_language,
                                            args.dst_language):
                        if args.api_key:
                            google_translate_api_key = args.api_key
                            translator = Translator(args.dst_language,
                                                    google_translate_api_key,
                                                    dst=args.dst_language,
                                                    src=args.src_language)
                            prompt = "Translating from {0} to {1}: ".format(
                                args.src_language, args.dst_language)
                            widgets = [
                                prompt,
                                Percentage(), ' ',
                                Bar(), ' ',
                                ETA()
                            ]
                            pbar = ProgressBar(widgets=widgets,
                                               maxval=len(regions)).start()
                            translated_transcripts = []
                            for i, transcript in enumerate(
                                    pool.imap(translator, transcripts)):
                                translated_transcripts.append(transcript)
                                pbar.update(i)
                            pbar.finish()
                            transcripts = translated_transcripts
                        else:
                            print(
                                "Error: Subtitle translation requires specified Google Translate API key. \
                                    See --help for further information.")
                            return 1

                except KeyboardInterrupt:
                    pbar.finish()
                    pool.terminate()
                    pool.join()
                    print("Cancelling transcription")
                    return 1

            timed_subtitles = [(r, t) for r, t in zip(regions, transcripts)
                               if t]
            formatter = FORMATTERS.get(args.format)
            formatted_subtitles = formatter(timed_subtitles)

            dest = args.output

            if not dest:
                base, ext = os.path.splitext(rpath)
                dest = "{base}.{format}".format(base=base, format=args.format)

            with open(dest, 'wb') as f:
                f.write(formatted_subtitles.encode("utf-8"))

            print("Subtitles file created at {}".format(dest))

            os.remove(audio_filename)

    return 0

Example #11

Show file

File: subtitles.py Project: franz96521/Anime-translator

    def generate_subtitles(
            source_path,
            src_language,
            listener_progress,
            output=None,
            concurrency=DEFAULT_CONCURRENCY,
            subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
            audio_filename='',
            audio_rate='',
            name=''
        ):      

        

        regions = find_speech_regions(audio_filename)

        converter = FLACConverter(source_path=audio_filename)
        recognizer = SpeechRecognizer(language=src_language, 
                                        rate=audio_rate,
                                        api_key=GOOGLE_SPEECH_API_KEY)
        transcripts = []
        if regions:
            try: 
                print("Step 1 of 2: Converting speech regions to FLAC files ")
                len_regions = len(regions)
                extracted_regions = []

                subtitles.pool = multiprocessing.Pool(concurrency)
                for i, extracted_region in enumerate(subtitles.pool.imap(converter, regions)):                    
                    extracted_regions.append(extracted_region)
                    print(i)
                  
                subtitles.stop()

                print("Step 2 of 2: Performing speech recognition ")
                subtitles.pool = multiprocessing.Pool(concurrency)
                for i, transcript in enumerate(subtitles.pool.imap(recognizer, extracted_regions)):                   
                    transcripts.append(transcript)
                    print(i)       
                subtitles.stop()
               
                

            except KeyboardInterrupt:
                subtitles.pbar.finish()
                subtitles.stop()
                raise

        timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
        formatter = FORMATTERS.get(subtitle_file_format)
        formatted_subtitles = formatter(timed_subtitles)

        dest = output

        if not dest:
            base = os.path.splitext(source_path)[0]+'\\'+name
            dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

        with open(dest, 'wb') as output_file:
            output_file.write(formatted_subtitles.encode("utf-8"))

        os.remove(audio_filename)

        subtitles.stop()
        return dest

Example #12

Show file

File: __init__.py Project: sukso96100/autosub

def generate_subtitles(  # pylint: disable=too-many-locals,too-many-arguments
        source_path,
        output=None,
        concurrency=DEFAULT_CONCURRENCY,
        src_language=DEFAULT_SRC_LANGUAGE,
        dst_language=DEFAULT_DST_LANGUAGE,
        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
        project_id=None,
        location="global",
        model="default"):
    """
    Given an input audio/video file, generate subtitles in the specified language and format.
    """
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)
    tpool = concurrent.futures.ThreadPoolExecutor(concurrency)
    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  model=model)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    tpool.map(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if src_language.split("-")[0] != dst_language.split("-")[0]:
                if project_id:
                    translator = Translator(project_id,
                                            location=location,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(
                        src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            tpool.map(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base = os.path.splitext(source_path)[0]
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as output_file:
        output_file.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest

Example #13

Show file

    def OpenFile(self, filename=None):
        """Open a media file in a MediaPlayer
        
        if filename is None:
            filename = QtGui.QFileDialog.getOpenFileName(self, "Open File", os.path.expanduser('~'))
        if not filename:
            return"""

        filename = QtGui.QFileDialog.getOpenFileName(self, 'Open File')
        formats = ['.mp4', '.mkv', '.avi', '.MP4', '.MKV', '.AVI']
        base, ext = os.path.splitext(str(filename))
        self.sub_path = base + '.' + 'srt'
        if filename == "" or ext not in formats:
            self.invalid_file()
        else:
            if os.path.isfile(base + '.' + 'srt'):
                print 'subtitle file already present'
            else:
                audio_filename, audio_rate = extract_audio(str(filename))
                command = "ffmpeg -i " + str(
                    filename
                ) + " -acodec pcm_s16le -ac 1 -ar 16000 " + base + ".wav"
                print command
                subprocess.call(command, shell=True)
                regions = find_speech_regions(audio_filename)
                # freeze_support()
                pool = multiprocessing.Pool(10)
                converter = FLACConverter(source_path=audio_filename)
                recognizer = SpeechRecognizer(language="en",
                                              rate=audio_rate,
                                              api_key=GOOGLE_SPEECH_API_KEY)

                transcripts = []
                if regions:
                    try:
                        widgets = [
                            "Converting speech regions to FLAC files: ",
                            Percentage(), ' ',
                            Bar(), ' ',
                            ETA()
                        ]
                        pbar = ProgressBar(widgets=widgets,
                                           maxval=len(regions)).start()
                        # self.progressLabel.setText("Converting speech regions to FLAC files: ")
                        extracted_regions = []
                        for i, extracted_region in enumerate(
                                pool.imap(converter, regions)):
                            extracted_regions.append(extracted_region)
                            pbar.update(i)
                            # self.progress.setValue(i * 100 / 66)
                        pbar.finish()

                        # self.progress.setValue(0)

                        widgets = [
                            "Performing speech recognition: ",
                            Percentage(), ' ',
                            Bar(), ' ',
                            ETA()
                        ]
                        pbar = ProgressBar(widgets=widgets,
                                           maxval=len(regions)).start()
                        # self.progressLabel.setText("Performing speech recognition: ")
                        for i, transcript in enumerate(
                                pool.imap(recognizer, extracted_regions)):
                            transcripts.append(transcript)
                            pbar.update(i)
                            # self.progress.setValue(i * 100 / 39)
                        pbar.finish()
                        # self.progress.setValue(100)

                        if not is_same_language("en", "en"):
                            if args.api_key:
                                google_translate_api_key = self.args.api_key
                                translator = Translator(
                                    self.args.dst_language,
                                    google_translate_api_key,
                                    dst=self.args.dst_language,
                                    src=self.args.src_language)
                                prompt = "Translating from {0} to {1}: ".format(
                                    self.args.src_language,
                                    self.args.dst_language)
                                widgets = [
                                    prompt,
                                    Percentage(), ' ',
                                    Bar(), ' ',
                                    ETA()
                                ]
                                pbar = ProgressBar(
                                    widgets=widgets,
                                    maxval=len(regions)).start()
                                translated_transcripts = []
                                # self.progress.setValue(0)
                                for i, transcript in enumerate(
                                        pool.imap(translator, transcripts)):
                                    translated_transcripts.append(transcript)
                                    pbar.update(i)
                                    # self.progress.setValue(i)
                                pbar.finish()
                                # self.progress.setValue(100)
                                transcripts = translated_transcripts
                            else:
                                print "Error: Subtitle translation requires specified Google Translate API key. \
                                See --help for further information."

                                return 1

                    except KeyboardInterrupt:
                        pbar.finish()
                        pool.terminate()
                        pool.join()
                        print "Cancelling transcription"
                        return 1

                timed_subtitles = [(r, t)
                                   for r, t in zip(regions, transcripts) if t]
                formatter = FORMATTERS.get("srt")
                formatted_subtitles = formatter(timed_subtitles)

                base, ext = os.path.splitext(str(filename))
                dest = "{base}.{format}".format(base=base, format="srt")

                with open(dest, 'wb') as f:
                    f.write(formatted_subtitles.encode("utf-8"))

                print "Subtitles file created at {}".format(dest)
                # open_video(0, self.args.source_path)
                # print formatted_subtitles.split('\n')

            # create the media
            self.sec = -1
            if sys.version < '3':
                filename = unicode(filename)
            self.media = self.instance.media_new(filename)
            # put the media in the media player
            self.mediaplayer.set_media(self.media)

            # parse the metadata of the file
            self.media.parse()
            # set the title of the track as window title
            self.setWindowTitle(self.media.get_meta(0))

            # the media player has to be 'connected' to the QFrame
            # (otherwise a video would be displayed in it's own window)

            if sys.platform.startswith(
                    'linux'):  # for Linux using the X Server
                self.mediaplayer.set_xwindow(self.videoframe.winId())
            elif sys.platform == "win32":  # for Windows
                self.mediaplayer.set_hwnd(self.videoframe.winId())
            elif sys.platform == "darwin":  # for MacOS
                self.mediaplayer.set_nsobject(self.videoframe.winId())
            self.PlayPause()
            self.duration = get_duration_wav(base + ".wav")
            f = open(self.sub_path)
            top_10 = {}
            pos = 0
            wrds = [
                'their', 'the', 'a', 'on', 'an', 'i', 'you', 'he', 'she', 'it',
                'we', 'they', 'me', 'him', 'her', 'us', 'them', 'what', 'who',
                'whom', 'mine', 'yours', 'his', 'hers', 'ours', 'theirs',
                'this', 'that', 'these', 'those', 'with', 'at', 'by', 'into',
                'for', 'to', 'up', 'of', 'in', 'is', 'are', 'and', 'as', 'if',
                'from'
            ]
            for i in f.readlines():
                pos += 1
                if (pos + 1) % 4 == 0:
                    for j in i.lower().split():
                        if top_10.get(j) != None:
                            if j not in wrds:
                                top_10[j] += 1
                        else:
                            if j not in wrds:
                                top_10[j] = 1
            top = []
            self.comboBox.clear()
            for key, value in sorted(top_10.iteritems(),
                                     key=lambda (k, v): (v, k),
                                     reverse=True):
                print "%s: %s" % (key, value)
                top.append(value)
                self.comboBox.addItem(str(key))
                if len(top) == 10:
                    break