Esempio n. 1
0
    def __call__(self, region):
        try:
            start_ms, end_ms = region
            start = float(start_ms) / 1000.0
            end = float(end_ms) / 1000.0
            if start > self.include_before:
                start = start - self.include_before
            end += self.include_after
            if not self.is_keep or not self.output:
                temp = tempfile.NamedTemporaryFile(suffix=self.suffix,
                                                   delete=False)
                command = self.cmd.format(start=start,
                                          dura=end - start,
                                          in_=self.source_path,
                                          out_=temp.name)
                prcs = subprocess.Popen(constants.cmd_conversion(command),
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
                prcs.communicate()
                return temp.name

            filename = self.output \
                + "-{start:0>8.3f}-{end:0>8.3f}{suffix}".format(
                    start=start,
                    end=end,
                    suffix=self.suffix)
            command = self.cmd.format(start=start,
                                      dura=end - start,
                                      in_=self.source_path,
                                      out_=filename)
            prcs = subprocess.Popen(constants.cmd_conversion(command),
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            err = prcs.communicate()[1]
            if err:
                return None
            audio_file = open(filename, mode="rb")
            audio_data = audio_file.read()
            audio_file.close()
            if len(audio_data) <= 4:
                return None
            return filename

        except KeyboardInterrupt:
            return None

        except subprocess.CalledProcessError as ffmpeg_exec_error:
            raise exceptions.AutosubException(
                _("Error: ffmpeg can't split your file. "
                  "Check your audio processing options.")
            ) from ffmpeg_exec_error
Esempio n. 2
0
    def __call__(self, region):
        try:
            start_ms, end_ms = region
            start = float(start_ms) / 1000.0
            end = float(end_ms) / 1000.0
            if start > self.include_before:
                start = start - self.include_before
            end += self.include_after
            if not self.is_keep or not self.output:
                temp = tempfile.NamedTemporaryFile(suffix=self.suffix, delete=False)
                command = self.cmd.format(start=start,
                                          dura=end - start,
                                          in_=self.source_path,
                                          out_=temp.name)
                subprocess.check_output(
                    constants.cmd_conversion(command),
                    stdin=open(os.devnull))
                return temp.name

            filename = self.output \
                + "-{start:0>8.3f}-{end:0>8.3f}{suffix}".format(
                    start=start,
                    end=end,
                    suffix=self.suffix)
            command = self.cmd.format(start=start,
                                      dura=end - start,
                                      in_=self.source_path,
                                      out_=filename)
            subprocess.check_output(
                constants.cmd_conversion(command),
                stdin=open(os.devnull))
            return filename

        except KeyboardInterrupt:
            return None

        except subprocess.CalledProcessError:
            raise exceptions.AutosubException(
                _("Error: ffmpeg can't split your file. "
                  "Check your audio processing options.")
            )
Esempio n. 3
0
def ffprobe_check_file(filename):
    """
    Give an audio or video file
    and check whether it is not empty by get its bitrate.
    """
    print(_("\nUse ffprobe to check conversion result."))
    command = constants.DEFAULT_CHECK_CMD.format(in_=filename)
    print(command)
    ffprobe_bytes = subprocess.check_output(constants.cmd_conversion(command),
                                            stdin=open(os.devnull))
    ffprobe_str = ffprobe_bytes.decode(sys.stdout.encoding)
    print(ffprobe_str)
    bitrate_idx = ffprobe_str.find('bit_rate')
    if bitrate_idx < 0 or \
            ffprobe_str[bitrate_idx + 9:bitrate_idx + 10].lower() == 'n':
        return False
    return True
Esempio n. 4
0
def ffprobe_check_file(filename):
    """
    Give an audio or video file
    and check whether it is not empty by get its bitrate.
    """
    command = "ffprobe {in_} -show_format -pretty -loglevel quiet".format(
        in_=filename)
    print(command)
    ffprobe_bytes = subprocess.check_output(constants.cmd_conversion(command),
                                            stdin=open(os.devnull),
                                            shell=False)
    ffprobe_str = ffprobe_bytes.decode(sys.stdout.encoding)
    bitrate_idx = ffprobe_str.find('bit_rate')
    if bitrate_idx < 0 or \
            ffprobe_str[bitrate_idx + 9:bitrate_idx + 10].lower() == 'n':
        return False
    return True
Esempio n. 5
0
def ffprobe_get_fps(  # pylint: disable=superfluous-parens
        video_file, input_m=input):
    """
    Return video_file's fps.
    """
    try:
        command = constants.DEFAULT_VIDEO_FPS_CMD.format(in_=video_file)
        print(command)
        prcs = subprocess.Popen(constants.cmd_conversion(command),
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        out, err = prcs.communicate()
        if out:
            ffprobe_str = out.decode(sys.stdout.encoding)
            print(ffprobe_str)
        else:
            ffprobe_str = err.decode(sys.stdout.encoding)
            print(ffprobe_str)
        num_list = map(
            int, re.findall(r'\d+', ffprobe_str.decode(sys.stdout.encoding)))
        num_list = list(num_list)
        if len(num_list) == 2:
            fps = float(num_list[0]) / float(num_list[1])
        else:
            raise ValueError

    except (subprocess.CalledProcessError, ValueError) as fps_issue:
        print(
            _("ffprobe can't get video fps.\n"
              "It is necessary when output is \".sub\"."))
        if input_m:
            input_str = input_m(
                _("Input your video fps. "
                  "Any illegal input will regard as \".srt\" instead.\n"))
            try:
                fps = float(input_str)
                if fps <= 0.0:
                    raise ValueError from fps_issue
            except ValueError:
                print(_("Use \".srt\" instead."))
                fps = 0.0
        else:
            return 0.0

    return fps
Esempio n. 6
0
def ffprobe_check_file(filename):
    """
    Give an audio or video file
    and check whether it is not empty by get its bitrate.
    """
    print(_("\nUse ffprobe to check conversion result."))
    command = constants.DEFAULT_CHECK_CMD.format(in_=filename)
    print(command)
    prcs = subprocess.Popen(constants.cmd_conversion(command),
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    out, err = prcs.communicate()
    if out:
        ffprobe_str = out.decode(sys.stdout.encoding)
        print(ffprobe_str)
    else:
        ffprobe_str = err.decode(sys.stdout.encoding)
        print(ffprobe_str)
    bitrate_idx = ffprobe_str.find('bit_rate')
    if bitrate_idx < 0 or \
            ffprobe_str[bitrate_idx + 9:bitrate_idx + 10].lower() == 'n':
        return False
    return True
Esempio n. 7
0
def ffprobe_get_fps(  # pylint: disable=superfluous-parens
        video_file, input_m=input):
    """
    Return video_file's fps.
    """
    try:
        command = constants.DEFAULT_VIDEO_FPS_CMD.format(in_=video_file)
        print(command)
        input_str = subprocess.check_output(constants.cmd_conversion(command),
                                            stdin=open(os.devnull))
        num_list = map(
            int, re.findall(r'\d+', input_str.decode(sys.stdout.encoding)))
        if len(list(num_list)) == 2:
            fps = float(num_list[0]) / float(num_list[1])
        else:
            raise ValueError

    except (subprocess.CalledProcessError, ValueError):
        print(
            _("ffprobe(ffmpeg) can't get video fps.\n"
              "It is necessary when output is \".sub\"."))
        if input_m:
            input_str = input_m(
                _("Input your video fps. "
                  "Any illegal input will regard as \".srt\" instead.\n"))
            try:
                fps = float(input_str)
                if fps <= 0.0:
                    raise ValueError
            except ValueError:
                print(_("Use \".srt\" instead."))
                fps = 0.0
        else:
            return 0.0

    return fps
Esempio n. 8
0
def audio_pre_prcs(  # pylint: disable=too-many-arguments, too-many-branches
        filename,
        is_keep,
        cmds,
        output_name=None,
        input_m=input):
    """
    Pre-process audio file.
    """
    output_list = [
        filename,
    ]
    if not cmds:
        cmds = constants.DEFAULT_AUDIO_PRCS
        if not constants.FFMPEG_NORMALIZE_CMD:
            print(
                _("Warning: Dependency ffmpeg-normalize "
                  "not found on this machine. "
                  "Try default method."))
            return None

    if is_keep and output_name:
        for i in range(1, len(cmds) + 1):
            output_list.append(output_name +
                               '_temp_{num:0>3d}.flac'.format(num=i))

            if input_m:
                while os.path.isfile(output_list[i]):
                    print(
                        _("There is already a file with the same name"
                          " in this location: \"{dest_name}\".").format(
                              dest_name=output_list[i]))
                    output_list[i] = input_m(
                        _("Input a new path (including directory and file name) "
                          "for output file.\n"))
                    output_list[i] = os.path.splitext(output_list[i])[0]
                    output_list[i] = "{base}.{extension}".format(
                        base=output_list[i], extension='temp.flac')
            else:
                if os.path.isfile(output_list[i]):
                    os.remove(output_list[i])

            command = cmds[i - 1].format(in_=output_list[i - 1],
                                         out_=output_list[i])
            print(command)
            subprocess.check_output(constants.cmd_conversion(command),
                                    stdin=open(os.devnull))
            if not ffprobe_check_file(output_list[i]):
                print(_("Audio pre-processing failed. Try default method."))
                return None

    else:
        temp_file = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
        temp = temp_file.name
        temp_file.close()
        if os.path.isfile(temp):
            os.remove(temp)
        output_list.append(temp)
        command = cmds[0].format(in_=output_list[0], out_=output_list[1])
        print(command)
        subprocess.check_output(constants.cmd_conversion(command),
                                stdin=open(os.devnull))
        for i in range(2, len(cmds) + 1):
            temp_file = tempfile.NamedTemporaryFile(suffix='.flac',
                                                    delete=False)
            temp = temp_file.name
            temp_file.close()
            if os.path.isfile(temp):
                os.remove(temp)
            output_list.append(temp)
            command = cmds[i - 1].format(in_=output_list[i - 1],
                                         out_=output_list[i])
            print(command)
            subprocess.check_output(constants.cmd_conversion(command),
                                    stdin=open(os.devnull))
            if not ffprobe_check_file(output_list[i]):
                print(_("Audio pre-processing failed. Try default method."))
                os.remove(output_list[i])
                return None
            os.remove(output_list[i - 1])

    return output_list[-1]
Esempio n. 9
0
def audio_or_video_prcs(  # pylint: disable=too-many-branches, too-many-statements, too-many-locals, too-many-arguments
        args,
        input_m=input,
        fps=30.0,
        styles_list=None,
        no_audio_prcs=False):
    """
    Give args and process an input audio or video file.
    """

    if args.http_speech_api:
        gsv2_api_url = "http://" + constants.GOOGLE_SPEECH_V2_API_URL
    else:
        gsv2_api_url = "https://" + constants.GOOGLE_SPEECH_V2_API_URL

    if not args.output_files:
        raise exceptions.AutosubException(
            _("\nNo works done."
              " Check your \"-of\"/\"--output-files\" option."))

    if args.ext_regions:
        # use external speech regions
        print(_("Use external speech regions."))
        audio_wav_temp = tempfile.NamedTemporaryFile(suffix='.wav',
                                                     delete=False)
        audio_wav = audio_wav_temp.name
        audio_wav_temp.close()
        command = args.audio_conversion_cmd.format(in_=args.input,
                                                   channel=1,
                                                   sample_rate=16000,
                                                   out_=audio_wav)
        print(command)
        subprocess.check_output(constants.cmd_conversion(command),
                                stdin=open(os.devnull))
        regions = sub_utils.sub_to_speech_regions(audio_wav=audio_wav,
                                                  sub_file=args.ext_regions)
        os.remove(audio_wav)

    else:
        # use auditok_gen_speech_regions
        mode = 0
        if args.strict_min_length:
            mode = auditok.StreamTokenizer.STRICT_MIN_LENGTH
            if args.drop_trailing_silence:
                mode = mode | auditok.StreamTokenizer.DROP_TRAILING_SILENCE
        elif args.drop_trailing_silence:
            mode = auditok.StreamTokenizer.DROP_TRAILING_SILENCE

        audio_wav_temp = tempfile.NamedTemporaryFile(suffix='.wav',
                                                     delete=False)
        audio_wav = audio_wav_temp.name
        audio_wav_temp.close()
        command = args.audio_conversion_cmd.format(in_=args.input,
                                                   channel=1,
                                                   sample_rate=48000,
                                                   out_=audio_wav)
        print(
            _("\nConvert source audio to \"{name}\" "
              "and get audio length for regions detection.").format(
                  name=audio_wav))
        print(command)
        subprocess.check_output(constants.cmd_conversion(command),
                                stdin=open(os.devnull))

        if not ffmpeg_utils.ffprobe_check_file(audio_wav):
            raise exceptions.AutosubException(
                _("Error: Convert source audio to \"{name}\" failed.").format(
                    name=audio_wav))

        regions = core.auditok_gen_speech_regions(
            audio_wav=audio_wav,
            energy_threshold=args.energy_threshold,
            min_region_size=constants.MIN_REGION_SIZE,
            max_region_size=constants.MAX_REGION_SIZE,
            max_continuous_silence=constants.DEFAULT_CONTINUOUS_SILENCE,
            mode=mode)
        os.remove(audio_wav)
        print(_("\n\"{name}\" has been deleted.").format(name=audio_wav))

    if not regions:
        raise exceptions.AutosubException(
            _("Error: Can't get speech regions."))
    if args.speech_language or \
            args.audio_process and 's' in args.audio_process:
        # process output first
        try:
            args.output_files.remove("regions")
            if args.styles and \
                    (args.format == 'ass' or
                     args.format == 'ssa' or
                     args.format == 'ass.json'):
                times_string = core.list_to_ass_str(
                    text_list=regions,
                    styles_list=styles_list,
                    subtitles_file_format=args.format)
            else:
                times_string = core.list_to_sub_str(
                    timed_text=regions,
                    fps=fps,
                    subtitles_file_format=args.format)
            # times to subtitles string
            times_name = "{base}.{nt}.{extension}".format(
                base=args.output, nt="times", extension=args.format)
            subtitles_file_path = core.str_to_file(str_=times_string,
                                                   output=times_name,
                                                   input_m=input_m)
            # subtitles string to file

            print(
                _("Times file created at \"{}\".").format(subtitles_file_path))

            if not args.output_files:
                raise exceptions.AutosubException(_("\nAll works done."))

        except KeyError:
            pass

        if not no_audio_prcs:
            audio_for_api_temp = tempfile.NamedTemporaryFile(
                suffix=args.api_suffix, delete=False)
            audio_for_api = audio_for_api_temp.name
            audio_for_api_temp.close()
            command = args.audio_conversion_cmd.format(
                in_=args.input,
                channel=args.api_audio_channel,
                sample_rate=args.api_sample_rate,
                out_=audio_for_api)
            print(
                _("\nConvert to \"{name}\" "
                  "for API.").format(name=audio_for_api))
            print(command)
            subprocess.check_output(constants.cmd_conversion(command),
                                    stdin=open(os.devnull))
            if not ffmpeg_utils.ffprobe_check_file(audio_for_api):
                raise exceptions.AutosubException(
                    _("Error: Convert source audio to \"{name}\" failed.").
                    format(name=audio_for_api))

        else:
            audio_for_api = args.input

        audio_fragments = core.bulk_audio_conversion(
            source_file=audio_for_api,
            output=args.output,
            regions=regions,
            split_cmd=args.audio_split_cmd,
            suffix=args.api_suffix,
            concurrency=args.audio_concurrency,
            is_keep=args.keep)

        if not audio_fragments or \
                len(audio_fragments) != len(regions):
            if not args.keep:
                for audio_fragment in audio_fragments:
                    os.remove(audio_fragment)
            raise exceptions.ConversionException(
                _("Error: Conversion failed."))

        if not args.keep:
            os.remove(audio_for_api)
            print(
                _("\n\"{name}\" has been deleted.").format(name=audio_for_api))

        if args.audio_process and 's' in args.audio_process:
            raise exceptions.AutosubException(
                _("Audio processing complete.\nAll works done."))

        # speech to text
        text_list = core.audio_to_text(audio_fragments=audio_fragments,
                                       api_url=gsv2_api_url,
                                       regions=regions,
                                       api_key=args.gspeechv2,
                                       concurrency=args.speech_concurrency,
                                       src_language=args.speech_language,
                                       min_confidence=args.min_confidence,
                                       audio_rate=args.api_sample_rate,
                                       is_keep=args.keep)

        if not text_list or len(text_list) != len(regions):
            raise exceptions.SpeechToTextException(
                _("Error: Speech-to-text failed.\nAll works done."))

        timed_text = get_timed_text(is_empty_dropped=args.drop_empty_regions,
                                    regions=regions,
                                    text_list=text_list)

        if args.dst_language:
            # process output first
            try:
                args.output_files.remove("src")
                if args.styles and \
                        (args.format == 'ass' or
                         args.format == 'ssa' or
                         args.format == 'ass.json'):
                    src_string = core.list_to_ass_str(
                        text_list=timed_text,
                        styles_list=styles_list[:2],
                        subtitles_file_format=args.format,
                    )
                else:
                    src_string = core.list_to_sub_str(
                        timed_text=timed_text,
                        fps=fps,
                        subtitles_file_format=args.format)

                # formatting timed_text to subtitles string
                src_name = "{base}.{nt}.{extension}".format(
                    base=args.output,
                    nt=args.speech_language,
                    extension=args.format)
                subtitles_file_path = core.str_to_file(str_=src_string,
                                                       output=src_name,
                                                       input_m=input_m)
                # subtitles string to file
                print(
                    _("Speech language subtitles "
                      "file created at \"{}\".").format(subtitles_file_path))

                if not args.output_files:
                    raise exceptions.AutosubException(_("\nAll works done."))

            except KeyError:
                pass

            # text translation
            if args.gtransv2:
                # use gtransv2
                translated_text = core.list_to_gtv2(
                    text_list=text_list,
                    api_key=args.gtransv2,
                    concurrency=args.trans_concurrency,
                    src_language=args.src_language,
                    dst_language=args.dst_language,
                    lines_per_trans=args.lines_per_trans)
            else:
                # use googletrans
                translated_text = core.list_to_googletrans(
                    text_list,
                    src_language=args.src_language,
                    dst_language=args.dst_language,
                    sleep_seconds=args.sleep_seconds,
                    user_agent=args.user_agent,
                    service_urls=args.service_urls)

            if not translated_text or len(translated_text) != len(regions):
                raise exceptions.AutosubException(
                    _("Error: Translation failed."))

            timed_trans = get_timed_text(
                is_empty_dropped=args.drop_empty_regions,
                regions=regions,
                text_list=translated_text)

            try:
                args.output_files.remove("bilingual")
                if args.styles and \
                        (args.format == 'ass' or
                         args.format == 'ssa' or
                         args.format == 'ass.json'):
                    bilingual_string = core.list_to_ass_str(
                        text_list=[timed_text, timed_trans],
                        styles_list=styles_list,
                        subtitles_file_format=args.format,
                    )
                else:
                    bilingual_string = core.list_to_sub_str(
                        timed_text=timed_text + timed_trans,
                        fps=fps,
                        subtitles_file_format=args.format)
                # formatting timed_text to subtitles string
                bilingual_name = "{base}.{nt}.{extension}".format(
                    base=args.output,
                    nt=args.src_language + '&' + args.dst_language,
                    extension=args.format)
                subtitles_file_path = core.str_to_file(str_=bilingual_string,
                                                       output=bilingual_name,
                                                       input_m=input_m)
                # subtitles string to file
                print(
                    _("Bilingual subtitles file "
                      "created at \"{}\".").format(subtitles_file_path))

                if not args.output_files:
                    raise exceptions.AutosubException(_("\nAll works done."))

            except KeyError:
                pass

            try:
                args.output_files.remove("dst")
                # formatting timed_text to subtitles string
                if args.styles and \
                        (args.format == 'ass' or
                         args.format == 'ssa' or
                         args.format == 'ass.json'):
                    if len(args.styles) == 4:
                        dst_string = core.list_to_ass_str(
                            text_list=timed_trans,
                            styles_list=styles_list[2:4],
                            subtitles_file_format=args.format,
                        )
                    else:
                        dst_string = core.list_to_ass_str(
                            text_list=timed_trans,
                            styles_list=styles_list,
                            subtitles_file_format=args.format,
                        )
                else:
                    dst_string = core.list_to_sub_str(
                        timed_text=timed_trans,
                        fps=fps,
                        subtitles_file_format=args.format)
                dst_name = "{base}.{nt}.{extension}".format(
                    base=args.output,
                    nt=args.dst_language,
                    extension=args.format)
                subtitles_file_path = core.str_to_file(str_=dst_string,
                                                       output=dst_name,
                                                       input_m=input_m)
                # subtitles string to file
                print(
                    _("Destination language subtitles "
                      "file created at \"{}\".").format(subtitles_file_path))

            except KeyError:
                pass

        else:
            if len(args.output_files) > 1 or not ({"dst", "src"}
                                                  & args.output_files):
                print(
                    _("Override \"-of\"/\"--output-files\" due to your args too few."
                      "\nOutput source subtitles file only."))
            timed_text = get_timed_text(
                is_empty_dropped=args.drop_empty_regions,
                regions=regions,
                text_list=text_list)
            if args.styles and \
                    (args.format == 'ass' or
                     args.format == 'ssa' or
                     args.format == 'ass.json'):
                src_string = core.list_to_ass_str(
                    text_list=timed_text,
                    styles_list=styles_list,
                    subtitles_file_format=args.format,
                )
            else:
                src_string = core.list_to_sub_str(
                    timed_text=timed_text,
                    fps=fps,
                    subtitles_file_format=args.format)
            # formatting timed_text to subtitles string
            src_name = "{base}.{nt}.{extension}".format(
                base=args.output,
                nt=args.speech_language,
                extension=args.format)
            subtitles_file_path = core.str_to_file(str_=src_string,
                                                   output=src_name,
                                                   input_m=input_m)
            # subtitles string to file
            print(
                _("Speech language subtitles "
                  "file created at \"{}\".").format(subtitles_file_path))

    else:
        print(
            _("Override \"-of\"/\"--output-files\" due to your args too few."
              "\nOutput regions subtitles file only."))
        if args.styles and \
                (args.format == 'ass' or
                 args.format == 'ssa' or
                 args.format == 'ass.json'):
            times_subtitles = core.list_to_ass_str(
                text_list=regions,
                styles_list=styles_list,
                subtitles_file_format=args.format)
        else:
            times_subtitles = core.list_to_sub_str(
                timed_text=regions, fps=fps, subtitles_file_format=args.format)
        # times to subtitles string
        times_name = "{base}.{nt}.{extension}".format(base=args.output,
                                                      nt="times",
                                                      extension=args.format)
        subtitles_file_path = core.str_to_file(str_=times_subtitles,
                                               output=times_name,
                                               input_m=input_m)
        # subtitles string to file

        print(_("Times file created at \"{}\".").format(subtitles_file_path))