コード例 #1
0
ファイル: analyzecontainer.py プロジェクト: fduch2k/aeneas
    def _match_files_flat_hierarchy(self, text_files, audio_files):
        """
        Match audio and text files in flat hierarchies.

        Two files match if their names,
        once removed the file extension,
        are the same.

        Examples: ::

            foo/text/a.txt foo/audio/a.mp3 => match: ["a", "foo/text/a.txt", "foo/audio/a.mp3"]
            foo/text/a.txt foo/audio/b.mp3 => no match
            foo/res/c.txt  foo/res/c.mp3   => match: ["c", "foo/res/c.txt", "foo/res/c.mp3"]
            foo/res/d.txt  foo/res/e.mp3   => no match

        :param text_files: the entries corresponding to text files
        :type  text_files: list of strings (path)
        :param audio_files: the entries corresponding to audio files
        :type  audio_files: list of strings (path)
        :rtype: list of lists (see above)
        """
        self._log("Matching files in flat hierarchy")
        self._log(["Text files: '%s'", text_files])
        self._log(["Audio files: '%s'", audio_files])
        d_text = dict()
        d_audio = dict()
        for text_file in text_files:
            text_file_no_ext = gf.file_name_without_extension(text_file)
            d_text[text_file_no_ext] = text_file
            self._log([
                "Added text file '%s' to key '%s'", text_file, text_file_no_ext
            ])
        for audio_file in audio_files:
            audio_file_no_ext = gf.file_name_without_extension(audio_file)
            d_audio[audio_file_no_ext] = audio_file
            self._log([
                "Added audio file '%s' to key '%s'", audio_file,
                audio_file_no_ext
            ])
        tasks = []
        for key in d_text.keys():
            self._log(["Examining text key '%s'", key])
            if key in d_audio:
                self._log(["Key '%s' is also in audio", key])
                tasks.append([key, d_text[key], d_audio[key]])
                self._log(
                    ["Added pair ('%s', '%s')", d_text[key], d_audio[key]])
        return tasks
コード例 #2
0
    def _match_files_flat_hierarchy(self, text_files, audio_files):
        """
        Match audio and text files in flat hierarchies.

        Two files match if their names,
        once removed the file extension,
        are the same.

        Examples: ::

            foo/text/a.txt foo/audio/a.mp3 => match: ["a", "foo/text/a.txt", "foo/audio/a.mp3"]
            foo/text/a.txt foo/audio/b.mp3 => no match
            foo/res/c.txt  foo/res/c.mp3   => match: ["c", "foo/res/c.txt", "foo/res/c.mp3"]
            foo/res/d.txt  foo/res/e.mp3   => no match

        :param text_files: the entries corresponding to text files
        :type  text_files: list of strings (path)
        :param audio_files: the entries corresponding to audio files
        :type  audio_files: list of strings (path)
        :rtype: list of lists (see above)
        """
        self._log("Matching files in flat hierarchy")
        self._log(["Text files: '%s'", text_files])
        self._log(["Audio files: '%s'", audio_files])
        d_text = dict()
        d_audio = dict()
        for text_file in text_files:
            text_file_no_ext = gf.file_name_without_extension(text_file)
            d_text[text_file_no_ext] = text_file
            self._log(["Added text file '%s' to key '%s'", text_file, text_file_no_ext])
        for audio_file in audio_files:
            audio_file_no_ext = gf.file_name_without_extension(audio_file)
            d_audio[audio_file_no_ext] = audio_file
            self._log(["Added audio file '%s' to key '%s'", audio_file, audio_file_no_ext])
        tasks = []
        for key in d_text.keys():
            self._log(["Examining text key '%s'", key])
            if key in d_audio:
                self._log(["Key '%s' is also in audio", key])
                tasks.append([key, d_text[key], d_audio[key]])
                self._log(["Added pair ('%s', '%s')", d_text[key], d_audio[key]])
        return tasks
コード例 #3
0
class ReadAudioCLI(AbstractCLIProgram):
    """
    Read audio file properties.
    """
    AUDIO_FILE = gf.relative_path("res/audio.mp3", __file__)

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Read audio file properties.",
        "synopsis": [(u"AUDIO_FILE", True)],
        "options":
        [u"-f, --full : load samples from file, possibly converting to WAVE"],
        "parameters": [],
        "examples": [u"%s" % (AUDIO_FILE)]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()
        audio_file_path = self.actual_arguments[0]

        try:
            audiofile = AudioFile(audio_file_path,
                                  rconf=self.rconf,
                                  logger=self.logger)
            audiofile.read_properties()
            if self.has_option([u"-f", u"--full"]):
                audiofile.read_samples_from_file()
            self.print_generic(audiofile.__unicode__())
            return self.NO_ERROR_EXIT_CODE
        except OSError:
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(
                u"Make sure the input file path is written/escaped correctly")
        except AudioFileProbeError:
            self.print_error(u"Unable to call the ffprobe executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFPROBE_PATH]))
            self.print_error(u"Make sure the path to ffprobe is correct")
        except AudioFileUnsupportedFormatError:
            self.print_error(u"Cannot read properties of file '%s'" %
                             (audio_file_path))
            self.print_error(
                u"Make sure the input file has a format supported by ffprobe")

        return self.ERROR_EXIT_CODE
コード例 #4
0
 def test_file_name_without_extension(self):
     tests = [
         ("", ""),
         ("/", ""),
         ("/foo", "foo"),
         ("/foo.", "foo"),
         ("/.foo", ".foo"),
         ("/foo.bar", "foo"),
         ("/foo/bar/foo.baz", "foo"),
         ("/foo/bar/baz", "baz"),
         ("/foo/bar/.baz", ".baz"),
         ("foo", "foo"),
         ("foo.", "foo"),
         (".foo", ".foo"),
         ("foo.bar", "foo"),
         ("foo/bar/foo.baz", "foo"),
         ("foo/bar/baz", "baz"),
         ("foo/bar/.baz", ".baz"),
     ]
     self.assertIsNone(gf.file_name_without_extension(None))
     for test in tests:
         self.assertEqual(gf.file_name_without_extension(test[0]), test[1])
コード例 #5
0
 def test_file_name_without_extension(self):
     tests = [
         ("", ""),
         ("/", ""),
         ("/foo", "foo"),
         ("/foo.", "foo"),
         ("/.foo", ".foo"),
         ("/foo.bar", "foo"),
         ("/foo/bar/foo.baz", "foo"),
         ("/foo/bar/baz", "baz"),
         ("/foo/bar/.baz", ".baz"),
         ("foo", "foo"),
         ("foo.", "foo"),
         (".foo", ".foo"),
         ("foo.bar", "foo"),
         ("foo/bar/foo.baz", "foo"),
         ("foo/bar/baz", "baz"),
         ("foo/bar/.baz", ".baz"),
     ]
     self.assertIsNone(gf.file_name_without_extension(None))
     for test in tests:
         self.assertEqual(gf.file_name_without_extension(test[0]), test[1])
コード例 #6
0
ファイル: ffmpeg_wrapper.py プロジェクト: shivupoojar/DeFog
class FFMPEGWrapperCLI(AbstractCLIProgram):
    """
    Convert audio files to mono WAV using the ``ffmpeg`` wrapper.
    """
    INPUT_FILE = gf.relative_path("res/audio.mp3", __file__)
    OUTPUT_FILE = "output/audio.wav"

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Convert audio files to mono WAV using the ffmpeg wrapper.",
        "synopsis": [(u"INPUT_FILE OUTPUT_FILE", True)],
        "examples": [u"%s %s" % (INPUT_FILE, OUTPUT_FILE)]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        input_file_path = self.actual_arguments[0]
        output_file_path = self.actual_arguments[1]

        if not self.check_input_file(input_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        try:
            converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger)
            converter.convert(input_file_path, output_file_path)
            self.print_success(u"Converted '%s' into '%s'" %
                               (input_file_path, output_file_path))
            return self.NO_ERROR_EXIT_CODE
        except FFMPEGPathError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
        except OSError:
            self.print_error(u"Cannot convert file '%s' into '%s'" %
                             (input_file_path, output_file_path))
            self.print_error(
                u"Make sure the input file has a format supported by ffmpeg")

        return self.ERROR_EXIT_CODE
コード例 #7
0
class FFPROBEWrapperCLI(AbstractCLIProgram):
    """
    Read audio file properties using the ``ffprobe`` wrapper.
    """
    AUDIO_FILE = gf.relative_path("res/audio.mp3", __file__)

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Read audio file properties using the ffprobe wrapper.",
        "synopsis": [(u"AUDIO_FILE", True)],
        "examples": [u"%s" % (AUDIO_FILE)]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()
        audio_file_path = self.actual_arguments[0]

        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE

        try:
            prober = FFPROBEWrapper(rconf=self.rconf, logger=self.logger)
            dictionary = prober.read_properties(audio_file_path)
            for key in sorted(dictionary.keys()):
                self.print_generic(u"%s %s" % (key, dictionary[key]))
            return self.NO_ERROR_EXIT_CODE
        except FFPROBEPathError:
            self.print_error(u"Unable to call the ffprobe executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFPROBE_PATH]))
            self.print_error(u"Make sure the path to ffprobe is correct")
        except (FFPROBEUnsupportedFormatError, FFPROBEParsingError):
            self.print_error(u"Cannot read properties of file '%s'" %
                             (audio_file_path))
            self.print_error(
                u"Make sure the input file has a format supported by ffprobe")

        return self.ERROR_EXIT_CODE
コード例 #8
0
 def test_file_name_without_extension(self):
     tests = [
         [None, None],
         ["", ""],
         ["/", ""],
         ["/foo", "foo"],
         ["/foo.", "foo"],
         ["/.foo", ".foo"],
         ["/foo.bar", "foo"],
         ["/foo/bar/foo.baz", "foo"],
         ["/foo/bar/baz", "baz"],
         ["/foo/bar/.baz", ".baz"],
         ["foo", "foo"],
         ["foo.", "foo"],
         [".foo", ".foo"],
         ["foo.bar", "foo"],
         ["foo/bar/foo.baz", "foo"],
         ["foo/bar/baz", "baz"],
         ["foo/bar/.baz", ".baz"],
     ]
     for test in tests:
         self.assertEqual(gf.file_name_without_extension(test[0]), test[1])
コード例 #9
0
 def test_file_name_without_extension(self):
     tests = [
         [None, None],
         ["", ""],
         ["/", ""],
         ["/foo", "foo"],
         ["/foo.", "foo"],
         ["/.foo", ".foo"],
         ["/foo.bar", "foo"],
         ["/foo/bar/foo.baz", "foo"],
         ["/foo/bar/baz", "baz"],
         ["/foo/bar/.baz", ".baz"],
         ["foo", "foo"],
         ["foo.", "foo"],
         [".foo", ".foo"],
         ["foo.bar", "foo"],
         ["foo/bar/foo.baz", "foo"],
         ["foo/bar/baz", "baz"],
         ["foo/bar/.baz", ".baz"],
     ]
     for test in tests:
         self.assertEqual(gf.file_name_without_extension(test[0]), test[1])
コード例 #10
0
ファイル: download.py プロジェクト: theolivenbaum/aeneas
class DownloadCLI(AbstractCLIProgram):
    """
    Download a file from a Web source.

    Currently, it downloads an audio file from a YouTube video.
    """
    OUTPUT_FILE_M4A = "output/sonnet.m4a"
    OUTPUT_FILE_OGG = "output/sonnet.ogg"
    URL_YOUTUBE = "https://www.youtube.com/watch?v=rU4a7AA8wM0"

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description": u"Download an audio file from a YouTube video.",
        "synopsis": [
            (u"YOUTUBE_URL [OUTPUT_FILE]", True)
        ],
        "examples": [
            u"%s --list" % (URL_YOUTUBE),
            u"%s %s" % (URL_YOUTUBE, OUTPUT_FILE_M4A),
            u"%s %s --index=0" % (URL_YOUTUBE, OUTPUT_FILE_M4A),
            u"%s %s --smallest-audio" % (URL_YOUTUBE, OUTPUT_FILE_OGG),
            u"%s %s --largest-audio --format=ogg" % (URL_YOUTUBE, OUTPUT_FILE_OGG),
        ],
        "options": [
            u"--format=FMT : preferably download audio stream in FMT format",
            u"--index=IDX : download audio stream with given index",
            u"--largest-audio : download largest audio stream (default)",
            u"--list : list all available audio streams but do not download",
            u"--smallest-audio : download smallest audio stream",
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        source_url = self.actual_arguments[0]
        output_file_path = self.actual_arguments[1]

        download = not self.has_option("--list")
        # largest_audio = True by default or if explicitly given
        if self.has_option("--largest-audio"):
            largest_audio = True
        else:
            largest_audio = not self.has_option("--smallest-audio")
        preferred_format = self.has_option_with_value("--format")
        preferred_index = gf.safe_int(self.has_option_with_value("--index"), None)

        try:
            if download:
                self.print_info(u"Downloading audio stream from '%s' ..." % source_url)
                downloader = Downloader(logger=self.logger)
                result = downloader.audio_from_youtube(
                    source_url,
                    download=download,
                    output_file_path=output_file_path,
                    preferred_index=preferred_index,
                    largest_audio=largest_audio,
                    preferred_format=preferred_format
                )
                self.print_info(u"Downloading audio stream from '%s' ... done" % source_url)
                self.print_success(u"Downloaded file '%s'" % result)
            else:
                self.print_info(u"Downloading stream info from '%s' ..." % source_url)
                downloader = Downloader(logger=self.logger)
                result = downloader.audio_from_youtube(
                    source_url,
                    download=False
                )
                self.print_info(u"Downloading stream info from '%s' ... done" % source_url)
                msg = []
                msg.append(u"%s\t%s\t%s\t%s" % ("Index", "Format", "Bitrate", "Size"))
                i = 0
                for audio in result:
                    ext = audio.extension
                    bitrate = audio.bitrate
                    size = gf.human_readable_number(audio.get_filesize())
                    msg.append(u"%d\t%s\t%s\t%s" % (i, ext, bitrate, size))
                    i += 1
                self.print_generic(u"Available audio streams:")
                self.print_generic(u"\n".join(msg))
            return self.NO_ERROR_EXIT_CODE
        except ImportError:
            self.print_no_pafy_error()
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while downloading audio from YouTube:")
            self.print_error(u"%s" % exc)

        return self.ERROR_EXIT_CODE
コード例 #11
0
class ConvertSyncMapCLI(AbstractCLIProgram):
    """
    Convert a sync map from a format to another.
    """
    AUDIO = gf.relative_path("res/audio.mp3", __file__)
    SMIL_PARAMETERS = "--audio-ref=audio/sonnet001.mp3 --page-ref=text/sonnet001.xhtml"
    SYNC_MAP_CSV = gf.relative_path("res/sonnet.csv", __file__)
    SYNC_MAP_JSON = gf.relative_path("res/sonnet.json", __file__)
    SYNC_MAP_ZZZ = gf.relative_path("res/sonnet.zzz", __file__)
    OUTPUT_HTML = "output/sonnet.html"
    OUTPUT_MAP_DAT = "output/syncmap.dat"
    OUTPUT_MAP_JSON = "output/syncmap.json"
    OUTPUT_MAP_SMIL = "output/syncmap.smil"
    OUTPUT_MAP_SRT = "output/syncmap.srt"
    OUTPUT_MAP_TXT = "output/syncmap.txt"

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Convert a sync map from a format to another.",
        "synopsis": [
            (u"INPUT_SYNCMAP OUTPUT_SYNCMAP", True),
            (u"INPUT_SYNCMAP OUTPUT_HTML AUDIO_FILE --output-html", True),
        ],
        "examples": [
            u"%s %s" % (SYNC_MAP_JSON, OUTPUT_MAP_SRT),
            u"%s %s --output-format=txt" % (SYNC_MAP_JSON, OUTPUT_MAP_DAT),
            u"%s %s --input-format=csv" % (SYNC_MAP_ZZZ, OUTPUT_MAP_TXT),
            u"%s %s --language=en" % (SYNC_MAP_CSV, OUTPUT_MAP_JSON),
            u"%s %s %s" % (SYNC_MAP_JSON, OUTPUT_MAP_SMIL, SMIL_PARAMETERS),
            u"%s %s %s --output-html" % (SYNC_MAP_JSON, OUTPUT_HTML, AUDIO)
        ],
        "options": [
            u"--audio-ref=REF : use REF for the audio ref attribute (smil, smilh, smilm)",
            u"--input-format=FMT : input sync map file has format FMT",
            u"--language=CODE : set language to CODE",
            u"--output-format=FMT : output sync map file has format FMT",
            u"--output-html : output HTML file for fine tuning",
            u"--page-ref=REF : use REF for the text ref attribute (smil, smilh, smilm)"
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        input_file_path = self.actual_arguments[0]
        output_file_path = self.actual_arguments[1]
        output_html = self.has_option(u"--output-html")

        if not self.check_input_file(input_file_path):
            return self.ERROR_EXIT_CODE
        input_sm_format = self.has_option_with_value(u"--input-format")
        if input_sm_format is None:
            input_sm_format = gf.file_extension(input_file_path)
        if not self.check_format(input_sm_format):
            return self.ERROR_EXIT_CODE

        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        if output_html:
            if len(self.actual_arguments) < 3:
                return self.print_help()
            audio_file_path = self.actual_arguments[2]
            if not self.check_input_file(audio_file_path):
                return self.ERROR_EXIT_CODE
        else:
            output_sm_format = self.has_option_with_value(u"--output-format")
            if output_sm_format is None:
                output_sm_format = gf.file_extension(output_file_path)
            if not self.check_format(output_sm_format):
                return self.ERROR_EXIT_CODE

        # TODO add a way to specify a text file for input formats like SMIL
        #      that do not carry the source text
        language = self.has_option_with_value(u"--language")
        audio_ref = self.has_option_with_value(u"--audio-ref")
        page_ref = self.has_option_with_value(u"--page-ref")
        parameters = {
            gc.PPN_SYNCMAP_LANGUAGE: language,
            gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF: audio_ref,
            gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF: page_ref
        }

        try:
            self.print_info(u"Reading sync map in '%s' format from file '%s'" %
                            (input_sm_format, input_file_path))
            self.print_info(u"Reading sync map...")
            syncmap = SyncMap(logger=self.logger)
            syncmap.read(input_sm_format, input_file_path, parameters)
            self.print_info(u"Reading sync map... done")
            self.print_info(u"Read %d sync map fragments" % (len(syncmap)))
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while reading the input sync map:"
            )
            self.print_error(u"%s" % (exc))
            return self.ERROR_EXIT_CODE

        if output_html:
            try:
                self.print_info(u"Writing HTML file...")
                syncmap.output_html_for_tuning(audio_file_path,
                                               output_file_path, parameters)
                self.print_info(u"Writing HTML file... done")
                self.print_success(u"Created HTML file '%s'" %
                                   (output_file_path))
                return self.NO_ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while writing the output HTML file:"
                )
                self.print_error(u"%s" % (exc))
        else:
            try:
                self.print_info(u"Writing sync map...")
                syncmap.write(output_sm_format, output_file_path, parameters)
                self.print_info(u"Writing sync map... done")
                self.print_success(u"Created '%s' sync map file '%s'" %
                                   (output_sm_format, output_file_path))
                return self.NO_ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while writing the output sync map:"
                )
                self.print_error(u"%s" % (exc))

        return self.ERROR_EXIT_CODE

    def check_format(self, sm_format):
        """
        Return ``True`` if the given sync map format is allowed,
        and ``False`` otherwise.

        :param sm_format: the sync map format to be checked
        :type  sm_format: Unicode string
        :rtype: bool
        """
        if sm_format not in SyncMapFormat.ALLOWED_VALUES:
            self.print_error(u"Sync map format '%s' is not allowed" %
                             (sm_format))
            self.print_info(u"Allowed formats:")
            self.print_generic(u" ".join(SyncMapFormat.ALLOWED_VALUES))
            return False
        return True
コード例 #12
0
ファイル: run_vad.py プロジェクト: theolivenbaum/aeneas
class RunVADCLI(AbstractCLIProgram):
    """
    Extract a list of speech intervals from the given audio file,
    using the MFCC energy-based VAD algorithm.
    """
    INPUT_FILE = gf.relative_path("res/audio.mp3", __file__)
    OUTPUT_BOTH = "output/both.txt"
    OUTPUT_NONSPEECH = "output/nonspeech.txt"
    OUTPUT_SPEECH = "output/speech.txt"
    MODES = [u"both", u"nonspeech", u"speech"]

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Extract a list of speech intervals using the MFCC energy-based VAD.",
        "synopsis":
        [(u"AUDIO_FILE [%s] [OUTPUT_FILE]" % (u"|".join(MODES)), True)],
        "examples": [
            u"%s both %s" % (INPUT_FILE, OUTPUT_BOTH),
            u"%s nonspeech %s" % (INPUT_FILE, OUTPUT_NONSPEECH),
            u"%s speech %s" % (INPUT_FILE, OUTPUT_SPEECH)
        ],
        "options": [
            u"-i, --index : output intervals as indices instead of seconds",
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        audio_file_path = self.actual_arguments[0]
        mode = self.actual_arguments[1]
        if mode not in [u"speech", u"nonspeech", u"both"]:
            return self.print_help()
        output_file_path = None
        if len(self.actual_arguments) >= 3:
            output_file_path = self.actual_arguments[2]
        output_time = not self.has_option([u"-i", u"--index"])

        self.check_c_extensions("cmfcc")
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE
        if (output_file_path is not None) and (
                not self.check_output_file(output_file_path)):
            return self.ERROR_EXIT_CODE

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path,
                                            rconf=self.rconf,
                                            logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Executing VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Executing VAD... done")

        speech = audio_file_mfcc.intervals(speech=True, time=output_time)
        nonspeech = audio_file_mfcc.intervals(speech=False, time=output_time)
        if mode == u"speech":
            intervals = speech
        elif mode == u"nonspeech":
            intervals = nonspeech
        elif mode == u"both":
            speech = [[x[0], x[1], u"speech"] for x in speech]
            nonspeech = [[x[0], x[1], u"nonspeech"] for x in nonspeech]
            intervals = sorted(speech + nonspeech)
        intervals = [tuple(interval) for interval in intervals]
        self.write_to_file(output_file_path, intervals, output_time)

        return self.NO_ERROR_EXIT_CODE

    def write_to_file(self, output_file_path, intervals, time):
        """
        Write intervals to file.

        :param output_file_path: path of the output file to be written;
                                 if ``None``, print to stdout
        :type  output_file_path: string (path)
        :param intervals: a list of tuples, each representing an interval
        :type  intervals: list of tuples
        """
        msg = []
        if len(intervals) > 0:
            if len(intervals[0]) == 2:
                template = u"%.3f\t%.3f" if time else u"%d\t%d"
            else:
                template = u"%.3f\t%.3f\t%s" if time else u"%d\t%d\t%s"
            msg = [template % (interval) for interval in intervals]
        if output_file_path is None:
            self.print_info(u"Intervals detected:")
            for line in msg:
                self.print_generic(line)
        else:
            with io.open(output_file_path, "w",
                         encoding="utf-8") as output_file:
                output_file.write(u"\n".join(msg))
                self.print_success(u"Created file '%s'" % output_file_path)
コード例 #13
0
class SynthesizeTextCLI(AbstractCLIProgram):
    """
    Synthesize several text fragments,
    producing a WAV audio file.
    """
    OUTPUT_FILE = "output/synthesized.wav"
    TEXT_FILE_MPLAIN = gf.relative_path("res/mplain.txt", __file__)
    TEXT_FILE_MUNPARSED = gf.relative_path("res/munparsed2.xhtml", __file__)
    TEXT_FILE_PARSED = gf.relative_path("res/parsed.txt", __file__)
    TEXT_FILE_PLAIN = gf.relative_path("res/plain.txt", __file__)
    TEXT_FILE_SUBTITLES = gf.relative_path("res/subtitles.txt", __file__)
    TEXT_FILE_UNPARSED = gf.relative_path("res/unparsed.xhtml", __file__)

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Synthesize several text fragments.",
        "synopsis":
        [(u"list 'fragment 1|fragment 2|...|fragment N' LANGUAGE OUTPUT_FILE",
          True),
         (u"[mplain|munparsed|parsed|plain|subtitles|unparsed] TEXT_FILE LANGUAGE OUTPUT_FILE",
          True)],
        "examples": [
            u"list 'From|fairest|creatures|we|desire|increase' eng %s" %
            (OUTPUT_FILE),
            u"mplain %s eng %s" % (TEXT_FILE_MPLAIN, OUTPUT_FILE),
            u"munparsed %s eng %s --l1-id-regex=p[0-9]+ --l2-id-regex=s[0-9]+ --l3-id-regex=w[0-9]+"
            % (TEXT_FILE_MUNPARSED, OUTPUT_FILE),
            u"parsed %s eng %s" % (TEXT_FILE_PARSED, OUTPUT_FILE),
            u"plain %s eng %s" % (TEXT_FILE_PLAIN, OUTPUT_FILE),
            u"subtitles %s eng %s" % (TEXT_FILE_SUBTITLES, OUTPUT_FILE),
            u"unparsed %s eng %s --id-regex=f[0-9]*" %
            (TEXT_FILE_UNPARSED, OUTPUT_FILE),
            u"unparsed %s eng %s --class-regex=ra" %
            (TEXT_FILE_UNPARSED, OUTPUT_FILE),
            u"unparsed %s eng %s --id-regex=f[0-9]* --sort=numeric" %
            (TEXT_FILE_UNPARSED, OUTPUT_FILE),
            u"plain %s eng %s --start=5" % (TEXT_FILE_PLAIN, OUTPUT_FILE),
            u"plain %s eng %s --end=10" % (TEXT_FILE_PLAIN, OUTPUT_FILE),
            u"plain %s eng %s --start=5 --end=10" %
            (TEXT_FILE_PLAIN, OUTPUT_FILE),
            u"plain %s eng %s --backwards" % (TEXT_FILE_PLAIN, OUTPUT_FILE),
            u"plain %s eng %s --quit-after=10.0" %
            (TEXT_FILE_PLAIN, OUTPUT_FILE),
        ],
        "options": [
            u"--class-regex=REGEX : extract text from elements with class attribute matching REGEX (unparsed)",
            u"--end=INDEX : slice the text file until fragment INDEX",
            u"--id-format=FMT : use FMT for generating text id attributes (subtitles, plain)",
            u"--id-regex=REGEX : extract text from elements with id attribute matching REGEX (unparsed)",
            u"--l1-id-regex=REGEX : extract text from level 1 elements with id attribute matching REGEX (munparsed)",
            u"--l2-id-regex=REGEX : extract text from level 2 elements with id attribute matching REGEX (munparsed)",
            u"--l3-id-regex=REGEX : extract text from level 3 elements with id attribute matching REGEX (munparsed)",
            u"--quit-after=DUR : synthesize fragments until DUR seconds or the end of text is reached",
            u"--sort=ALGORITHM : sort the matched element id attributes using ALGORITHM (lexicographic, numeric, unsorted)",
            u"--start=INDEX : slice the text file from fragment INDEX",
            u"-b, --backwards : synthesize from the last fragment to the first one",
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        backwards = self.has_option([u"-b", u"--backwards"])
        quit_after = gf.safe_float(self.has_option_with_value(u"--quit-after"),
                                   None)
        start_fragment = gf.safe_int(self.has_option_with_value(u"--start"),
                                     None)
        end_fragment = gf.safe_int(self.has_option_with_value(u"--end"), None)
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort,
        }
        if (text_format == TextFileFormat.MUNPARSED) and (
            (l1_id_regex is None) or (l2_id_regex is None) or
            (l3_id_regex is None)):
            self.print_error(
                u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format"
            )
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (
                id_regex is None) and (class_regex is None):
            self.print_error(
                u"You must specify --id-regex and/or --class-regex for unparsed format"
            )
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        output_file_path = self.actual_arguments[3]
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(
                u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" %
                        (len(text_file)))
        if start_fragment is not None:
            self.print_info(u"Slicing from index %d" % (start_fragment))
        if end_fragment is not None:
            self.print_info(u"Slicing to index %d" % (end_fragment))
        text_slice = text_file.get_slice(start_fragment, end_fragment)
        self.print_info(u"Synthesizing %d fragments" % (len(text_slice)))

        if quit_after is not None:
            self.print_info(u"Stop synthesizing upon reaching %.3f seconds" %
                            (quit_after))

        try:
            synt = Synthesizer(rconf=self.rconf, logger=self.logger)
            synt.synthesize(text_slice,
                            output_file_path,
                            quit_after=quit_after,
                            backwards=backwards)
            self.print_success(u"Created file '%s'" % output_file_path)
            synt.clear_cache()
            return self.NO_ERROR_EXIT_CODE
        except ImportError as exc:
            tts = self.rconf[RuntimeConfiguration.TTS]
            if tts == Synthesizer.AWS:
                self.print_error(
                    u"You need to install Python module boto3 to use the AWS Polly TTS API wrapper. Run:"
                )
                self.print_error(u"$ pip install boto3")
                self.print_error(u"or, to install for all users:")
                self.print_error(u"$ sudo pip install boto3")
            elif tts == Synthesizer.NUANCE:
                self.print_error(
                    u"You need to install Python module requests to use the Nuance TTS API wrapper. Run:"
                )
                self.print_error(u"$ pip install requests")
                self.print_error(u"or, to install for all users:")
                self.print_error(u"$ sudo pip install requests")
            else:
                self.print_error(
                    u"An unexpected error occurred while synthesizing text:")
                self.print_error(u"%s" % exc)
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while synthesizing text:")
            self.print_error(u"%s" % exc)

        return self.ERROR_EXIT_CODE
コード例 #14
0
class ExtractMFCCCLI(AbstractCLIProgram):
    """
    Extract MFCCs from a given audio file.
    """
    INPUT_FILE = gf.relative_path("res/audio.wav", __file__)
    OUTPUT_FILE = "output/audio.wav.mfcc.txt"

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Extract MFCCs from a given audio file as a fat matrix.",
        "synopsis": [(u"AUDIO_FILE OUTPUT_FILE", True)],
        "examples": [u"%s %s" % (INPUT_FILE, OUTPUT_FILE)],
        "options": [
            u"-b, --binary : output MFCCs as a float64 binary file",
            u"-d, --delete-first : do not output the 0th MFCC coefficient",
            u"-n, --npy : output MFCCs as a NumPy .npy binary file",
            u"-t, --transpose : transpose the MFCCs matrix, returning a tall matrix",
            u"-z, --npz : output MFCCs as a NumPy compressed .npz binary file",
            u"--format=FMT : output to text file using format FMT (default: '%.18e')"
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        input_file_path = self.actual_arguments[0]
        output_file_path = self.actual_arguments[1]

        output_text_format = self.has_option_with_value(u"--format")
        if output_text_format is None:
            output_text_format = u"%.18e"
        output_binary = self.has_option([u"-b", u"--binary"])
        output_npz = self.has_option([u"-z", u"--npz"])
        output_npy = self.has_option([u"-n", u"--npy"])
        delete_first = self.has_option([u"-d", u"--delete-first"])
        transpose = self.has_option([u"-t", u"--transpose"])

        self.check_c_extensions("cmfcc")
        if not self.check_input_file(input_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        try:
            mfccs = AudioFileMFCC(input_file_path,
                                  rconf=self.rconf,
                                  logger=self.logger).all_mfcc
            if delete_first:
                mfccs = mfccs[1:, :]
            if transpose:
                mfccs = mfccs.transpose()
            if output_binary:
                # save as a raw C float64 binary file
                mapped = numpy.memmap(output_file_path,
                                      dtype="float64",
                                      mode="w+",
                                      shape=mfccs.shape)
                mapped[:] = mfccs[:]
                mapped.flush()
                del mapped
            elif output_npz:
                # save as a .npz compressed binary file
                with io.open(output_file_path, "wb") as output_file:
                    numpy.savez(output_file, mfccs)
            elif output_npy:
                # save as a .npy binary file
                with io.open(output_file_path, "wb") as output_file:
                    numpy.save(output_file, mfccs)
            else:
                # save as a text file
                # NOTE: in Python 2, passing the fmt value a Unicode string crashes NumPy
                #       hence, converting back to bytes, which works in Python 3 too
                numpy.savetxt(output_file_path,
                              mfccs,
                              fmt=gf.safe_bytes(output_text_format))
            self.print_info(u"MFCCs shape: %d %d" % (mfccs.shape))
            self.print_success(u"MFCCs saved to '%s'" % (output_file_path))
            return self.NO_ERROR_EXIT_CODE
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (input_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
        except OSError:
            self.print_error(u"Cannot write file '%s'" % (output_file_path))

        return self.ERROR_EXIT_CODE
コード例 #15
0
ファイル: read_text.py プロジェクト: theolivenbaum/aeneas
class ReadTextCLI(AbstractCLIProgram):
    """
    Read text fragments from file.
    """
    TEXT_FILE_MPLAIN = gf.relative_path("res/mplain.txt", __file__)
    TEXT_FILE_MUNPARSED = gf.relative_path("res/munparsed2.xhtml", __file__)
    TEXT_FILE_PARSED = gf.relative_path("res/parsed.txt", __file__)
    TEXT_FILE_PLAIN = gf.relative_path("res/plain.txt", __file__)
    TEXT_FILE_SUBTITLES = gf.relative_path("res/subtitles.txt", __file__)
    TEXT_FILE_UNPARSED = gf.relative_path("res/unparsed.xhtml", __file__)

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Read text fragments from file.",
        "synopsis":
        [(u"list 'fragment 1|fragment 2|...|fragment N'", True),
         (u"[mplain|munparsed|parsed|plain|subtitles|unparsed] TEXT_FILE",
          True)],
        "options": [
            u"--class-regex=REGEX : extract text from elements with class attribute matching REGEX (unparsed)",
            u"--id-regex=REGEX : extract text from elements with id attribute matching REGEX (unparsed)",
            u"--id-format=FMT : use FMT for generating text id attributes (subtitles, plain)",
            u"--l1-id-regex=REGEX : extract text from level 1 elements with id attribute matching REGEX (munparsed)",
            u"--l2-id-regex=REGEX : extract text from level 2 elements with id attribute matching REGEX (munparsed)",
            u"--l3-id-regex=REGEX : extract text from level 3 elements with id attribute matching REGEX (munparsed)",
            u"--sort=ALGORITHM : sort the matched element id attributes using ALGORITHM (lexicographic, numeric, unsorted)",
        ],
        "examples": [
            u"list 'From|fairest|creatures|we|desire|increase'",
            u"mplain %s" % (TEXT_FILE_MPLAIN),
            u"munparsed %s --l1-id-regex=p[0-9]+ --l2-id-regex=s[0-9]+ --l3-id-regex=w[0-9]+"
            % (TEXT_FILE_MUNPARSED),
            u"parsed %s" % (TEXT_FILE_PARSED),
            u"plain %s" % (TEXT_FILE_PLAIN),
            u"plain %s --id-format=Word%%06d" % (TEXT_FILE_PLAIN),
            u"subtitles %s" % (TEXT_FILE_SUBTITLES),
            u"subtitles %s --id-format=Sub%%03d" % (TEXT_FILE_SUBTITLES),
            u"unparsed %s --id-regex=f[0-9]*" % (TEXT_FILE_UNPARSED),
            u"unparsed %s --class-regex=ra --sort=unsorted" %
            (TEXT_FILE_UNPARSED),
            u"unparsed %s --id-regex=f[0-9]* --sort=numeric" %
            (TEXT_FILE_UNPARSED),
            u"unparsed %s --id-regex=f[0-9]* --sort=lexicographic" %
            (TEXT_FILE_UNPARSED)
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        id_format = self.has_option_with_value(u"--id-format")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort,
            gc.PPN_TASK_OS_FILE_ID_REGEX: id_format
        }
        if (text_format == TextFileFormat.MUNPARSED) and (
            (l1_id_regex is None) or (l2_id_regex is None) or
            (l3_id_regex is None)):
            self.print_error(
                u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format"
            )
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (
                id_regex is None) and (class_regex is None):
            self.print_error(
                u"You must specify --id-regex and/or --class-regex for unparsed format"
            )
            return self.ERROR_EXIT_CODE
        if (text_format in [TextFileFormat.PLAIN, TextFileFormat.SUBTITLES
                            ]) and (id_format is not None):
            try:
                identifier = id_format % 1
            except (TypeError, ValueError):
                self.print_error(
                    u"The given string '%s' is not a valid id format" %
                    id_format)
                return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(
                u"Unable to build a TextFile from the given parameters")
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
        else:
            self.print_generic(text_file.__unicode__())
            return self.NO_ERROR_EXIT_CODE
        return self.ERROR_EXIT_CODE
コード例 #16
0
class HydraCLI(AbstractCLIProgram):
    """
    This "hydra" tool invokes another aeneas.tool,
    according to the specified tool switch.
    """
    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Invoke the specified aeneas tool",
        "synopsis": [(u"TOOL_PARAMETER TOOL_ARGUMENTS", True)],
        "options": [],
        "parameters": [
            u"--convert-syncmap: call aeneas.tools.convert_syncmap",
            u"--download: call aeneas.tools.download",
            u"--execute-job: call aeneas.tools.execute_job",
            u"--execute-task: call aeneas.tools.execute_task (default)",
            u"--extract-mfcc: call aeneas.tools.extract_mfcc",
            u"--ffmpeg-wrapper: call aeneas.tools.ffmpeg_wrapper",
            u"--ffprobe-wrapper: call aeneas.tools.ffprobe_wrapper",
            u"--plot-waveform: call aeneas.tools.plot_waveform",
            u"--read-audio: call aeneas.tools.read_audio",
            u"--read-text: call aeneas.tools.read_text",
            u"--run-sd: call aeneas.tools.run_sd",
            u"--run-vad: call aeneas.tools.run_vad",
            u"--synthesize-text: call aeneas.tools.synthesize_text",
            u"--validate: call aeneas.tools.validate",
        ],
        "examples": [
            u"--execute-task --help",
            u"--execute-task --examples",
            u"--execute-task --example-json",
            u"--execute-job --help",
        ]
    }

    TOOLS = [
        (ConvertSyncMapCLI, [u"--convert-syncmap"]),
        (DownloadCLI, [u"--download"]),
        (ExecuteJobCLI, [u"--execute-job"]),
        (ExecuteTaskCLI, [u"--execute-task"]),
        (ExtractMFCCCLI, [u"--extract-mfcc"]),
        (FFMPEGWrapperCLI, [u"--ffmpeg-wrapper"]),
        (FFPROBEWrapperCLI, [u"--ffprobe-wrapper"]),
        (PlotWaveformCLI, [u"--plot-waveform"]),
        (ReadAudioCLI, [u"--read-audio"]),
        (ReadTextCLI, [u"--read-text"]),
        (RunSDCLI, [u"--run-sd"]),
        (RunVADCLI, [u"--run-vad"]),
        (SynthesizeTextCLI, [u"--synthesize-text"]),
        (ValidateCLI, [u"--validate"]),
    ]

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        # if no actual arguments, print help
        if len(self.actual_arguments) < 1:
            return self.print_help(short=True)

        # check if we have a recognized tool switch
        for cls, switches in self.TOOLS:
            if self.has_option(switches):
                arguments = [a for a in sys.argv if a not in switches]
                return cls(invoke=(self.invoke + u" %s" % switches[0])).run(
                    arguments=arguments)

        # check if we have -h, --help, or --version
        if u"-h" in self.actual_arguments:
            return self.print_help(short=True)
        if u"--help" in self.actual_arguments:
            return self.print_help(short=False)
        if u"--version" in self.actual_arguments:
            return self.print_name_version()

        # default to run ExecuteTaskCLI
        return ExecuteTaskCLI(invoke=self.invoke).run(arguments=sys.argv)
コード例 #17
0
ファイル: execute_task.py プロジェクト: theolivenbaum/aeneas
class ExecuteTaskCLI(AbstractCLIProgram):
    """
    Execute a Task, that is, a pair of audio/text files
    and a configuration string.
    """

    AUDIO_FILE = gf.relative_path("res/audio.mp3", __file__)
    CTW_ESPEAK = gf.relative_path("../extra/ctw_espeak.py", __file__)
    CTW_SPEECT = gf.relative_path("../extra/ctw_speect/ctw_speect.py",
                                  __file__)

    DEMOS = {
        u"--example-aftercurrent": {
            u"description":
            u"input: plain text (plain), output: AUD, aba beforenext 0.200",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=aud|task_adjust_boundary_algorithm=aftercurrent|task_adjust_boundary_aftercurrent_value=0.200",
            u"syncmap": "output/sonnet.aftercurrent0200.aud",
            u"options": u"",
            u"show": False
        },
        u"--example-beforenext": {
            u"description":
            u"input: plain text (plain), output: AUD, aba beforenext 0.200",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=aud|task_adjust_boundary_algorithm=beforenext|task_adjust_boundary_beforenext_value=0.200",
            u"syncmap": "output/sonnet.beforenext0200.aud",
            u"options": u"",
            u"show": False
        },
        u"--example-cewsubprocess": {
            u"description":
            u"input: plain text, output: TSV, run via cewsubprocess",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=tsv",
            u"syncmap": "output/sonnet.cewsubprocess.tsv",
            u"options": u"-r=\"cew_subprocess_enabled=True\"",
            u"show": False
        },
        u"--example-ctw-espeak": {
            u"description":
            u"input: plain text, output: TSV, tts engine: ctw espeak",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=tsv",
            u"syncmap": "output/sonnet.ctw_espeak.tsv",
            u"options": u"-r=\"tts=custom|tts_path=%s\"" % CTW_ESPEAK,
            u"show": False
        },
        u"--example-ctw-speect": {
            u"description":
            u"input: plain text, output: TSV, tts engine: ctw speect",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=tsv",
            u"syncmap": "output/sonnet.ctw_speect.tsv",
            u"options": u"-r=\"tts=custom|tts_path=%s\"" % CTW_SPEECT,
            u"show": False
        },
        u"--example-eaf": {
            u"description": u"input: plain text, output: EAF",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=eaf",
            u"syncmap": "output/sonnet.eaf",
            u"options": u"",
            u"show": True
        },
        u"--example-faster-rate": {
            u"description":
            u"input: plain text (plain), output: SRT, print faster than 14.0 chars/s",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=srt|task_adjust_boundary_algorithm=rate|task_adjust_boundary_rate_value=14.000",
            u"syncmap": "output/sonnet.faster.srt",
            u"options": u"--faster-rate",
            u"show": False
        },
        u"--example-festival": {
            u"description":
            u"input: plain text, output: TSV, tts engine: Festival",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=tsv",
            u"syncmap": "output/sonnet.festival.tsv",
            u"options": u"-r=\"tts=festival\"",
            u"show": False
        },
        u"--example-flatten-12": {
            u"description":
            u"input: mplain text (multilevel), output: JSON, levels to output: 1 and 2",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/mplain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=mplain|os_task_file_format=json|os_task_file_levels=12",
            u"syncmap": "output/sonnet.flatten12.json",
            u"options": u"",
            u"show": False
        },
        u"--example-flatten-2": {
            u"description":
            u"input: mplain text (multilevel), output: JSON, levels to output: 2",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/mplain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=mplain|os_task_file_format=json|os_task_file_levels=2",
            u"syncmap": "output/sonnet.flatten2.json",
            u"options": u"",
            u"show": False
        },
        u"--example-flatten-3": {
            u"description":
            u"input: mplain text (multilevel), output: JSON, levels to output: 3",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/mplain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=mplain|os_task_file_format=json|os_task_file_levels=3",
            u"syncmap": "output/sonnet.flatten3.json",
            u"options": u"",
            u"show": False
        },
        u"--example-head-tail": {
            u"description":
            u"input: plain text, output: TSV, explicit head and tail",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=tsv|is_audio_file_head_length=0.400|is_audio_file_tail_length=0.500|os_task_file_head_tail_format=stretch",
            u"syncmap": "output/sonnet.headtail.tsv",
            u"options": u"",
            u"show": False
        },
        u"--example-json": {
            u"description": u"input: plain text, output: JSON",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=json",
            u"syncmap": "output/sonnet.json",
            u"options": u"",
            u"show": True
        },
        u"--example-mplain-json": {
            u"description":
            u"input: multilevel plain text (mplain), output: JSON",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/mplain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=mplain|os_task_file_format=json",
            u"syncmap": "output/sonnet.mplain.json",
            u"options": u"",
            u"show": False
        },
        u"--example-mplain-smil": {
            u"description":
            u"input: multilevel plain text (mplain), output: SMIL",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/mplain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=mplain|os_task_file_format=smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml",
            u"syncmap": "output/sonnet.mplain.smil",
            u"options": u"",
            u"show": True
        },
        u"--example-multilevel-tts": {
            u"description":
            u"input: multilevel plain text (mplain), different TTS engines, output: JSON",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/mplain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=mplain|os_task_file_format=json",
            u"syncmap": "output/sonnet.mplain.json",
            u"options":
            u"-r=\"tts_l1=festival|tts_l2=festival|tts_l3=espeak\"",
            u"show": False
        },
        u"--example-munparsed-json": {
            u"description":
            u"input: multilevel unparsed text (munparsed), output: JSON",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/munparsed2.xhtml", __file__),
            u"config":
            u"task_language=eng|is_text_type=munparsed|is_text_munparsed_l1_id_regex=p[0-9]+|is_text_munparsed_l2_id_regex=s[0-9]+|is_text_munparsed_l3_id_regex=w[0-9]+|os_task_file_format=json",
            u"syncmap": "output/sonnet.munparsed.json",
            u"options": u"",
            u"show": False
        },
        u"--example-munparsed-smil": {
            u"description":
            u"input: multilevel unparsed text (munparsed), output: SMIL",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/munparsed.xhtml", __file__),
            u"config":
            u"task_language=eng|is_text_type=munparsed|is_text_munparsed_l1_id_regex=p[0-9]+|is_text_munparsed_l2_id_regex=p[0-9]+s[0-9]+|is_text_munparsed_l3_id_regex=p[0-9]+s[0-9]+w[0-9]+|os_task_file_format=smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml",
            u"syncmap": "output/sonnet.munparsed.smil",
            u"options": u"",
            u"show": True
        },
        u"--example-mws": {
            u"description":
            u"input: plain text, output: JSON, resolution: 0.500s",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=json",
            u"syncmap": "output/sonnet.mws.json",
            u"options":
            u"-r=\"mfcc_window_length=1.500|mfcc_window_shift=0.500\"",
            u"show": False
        },
        u"--example-no-zero": {
            u"description":
            u"input: multilevel plain text (mplain), output: JSON, no zero duration",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/mplain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=mplain|os_task_file_format=json|os_task_file_no_zero=True",
            u"syncmap": "output/sonnet.nozero.json",
            u"options": u"--zero",
            u"show": False
        },
        u"--example-offset": {
            u"description":
            u"input: plain text (plain), output: AUD, aba offset 0.200",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=aud|task_adjust_boundary_algorithm=offset|task_adjust_boundary_offset_value=0.200",
            u"syncmap": "output/sonnet.offset0200.aud",
            u"options": u"",
            u"show": False
        },
        u"--example-percent": {
            u"description":
            u"input: plain text (plain), output: AUD, aba percent 50",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=aud|task_adjust_boundary_algorithm=percent|task_adjust_boundary_percent_value=50",
            u"syncmap": "output/sonnet.percent50.aud",
            u"options": u"",
            u"show": False
        },
        u"--example-py": {
            u"description": u"input: plain text, output: JSON, pure python",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=json",
            u"syncmap": "output/sonnet.cext.json",
            u"options": u"-r=\"c_extensions=False\"",
            u"show": False
        },
        u"--example-rates": {
            u"description":
            u"input: plain text (plain), output: SRT, max rate 14.0 chars/s, print rates",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=srt|task_adjust_boundary_algorithm=rate|task_adjust_boundary_rate_value=14.000",
            u"syncmap": "output/sonnet.rates.srt",
            u"options": u"--rates",
            u"show": False
        },
        u"--example-sd": {
            u"description":
            u"input: plain text, output: TSV, head/tail detection",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=tsv|is_audio_file_detect_head_max=10.000|is_audio_file_detect_tail_max=10.000",
            u"syncmap": "output/sonnet.sd.tsv",
            u"options": u"",
            u"show": False
        },
        u"--example-srt": {
            u"description": u"input: subtitles text, output: SRT",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/subtitles.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=subtitles|os_task_file_format=srt",
            u"syncmap": "output/sonnet.srt",
            u"options": u"",
            u"show": True
        },
        u"--example-smil": {
            u"description": u"input: unparsed text, output: SMIL",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/page.xhtml", __file__),
            u"config":
            u"task_language=eng|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric|os_task_file_format=smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml",
            u"syncmap": "output/sonnet.smil",
            u"options": u"",
            u"show": True
        },
        u"--example-tsv": {
            u"description": u"input: parsed text, output: TSV",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/parsed.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=parsed|os_task_file_format=tsv",
            u"syncmap": "output/sonnet.tsv",
            u"options": u"",
            u"show": True
        },
        u"--example-words": {
            u"description":
            u"input: single word granularity plain text, output: AUD",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/words.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=aud",
            u"syncmap": "output/sonnet.words.aud",
            u"options": u"",
            u"show": True
        },
        u"--example-words-festival-cache": {
            u"description":
            u"input: single word granularity plain text, output: AUD, tts engine: Festival, TTS cache on",
            u"audio": AUDIO_FILE,
            u"text": gf.relative_path("res/words.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=aud",
            u"syncmap": "output/sonnet.words.aud",
            u"options": u"-r=\"tts=festival|tts_cache=True\"",
            u"show": False
        },
        u"--example-youtube": {
            u"description": u"input: audio from YouTube, output: TXT",
            u"audio": "https://www.youtube.com/watch?v=rU4a7AA8wM0",
            u"text": gf.relative_path("res/plain.txt", __file__),
            u"config":
            u"task_language=eng|is_text_type=plain|os_task_file_format=txt",
            u"syncmap": "output/sonnet.txt",
            u"options": u"-y",
            u"show": True
        }
    }

    PARAMETERS = [
        u"  task_language                           : language (*)",
        u"",
        u"  is_audio_file_detect_head_max           : detect audio head, at most this many seconds",
        u"  is_audio_file_detect_head_min           : detect audio head, at least this many seconds",
        u"  is_audio_file_detect_tail_max           : detect audio tail, at most this many seconds",
        u"  is_audio_file_detect_tail_min           : detect audio tail, at least this many seconds",
        u"  is_audio_file_head_length               : ignore this many seconds from the begin of the audio file",
        u"  is_audio_file_process_length            : process this many seconds of the audio file",
        u"  is_audio_file_tail_length               : ignore this many seconds from the end of the audio file",
        u"",
        u"  is_text_type                            : input text format (*)",
        u"  is_text_mplain_word_separator           : word separator (mplain)",
        u"  is_text_munparsed_l1_id_regex           : regex matching level 1 id attributes (munparsed)",
        u"  is_text_munparsed_l2_id_regex           : regex matching level 2 id attributes (munparsed)",
        u"  is_text_munparsed_l3_id_regex           : regex matching level 3 id attributes (munparsed)",
        u"  is_text_unparsed_class_regex            : regex matching class attributes (unparsed)",
        u"  is_text_unparsed_id_regex               : regex matching id attributes (unparsed)",
        u"  is_text_unparsed_id_sort                : sort matched elements by id (unparsed) (*)",
        u"  is_text_file_ignore_regex               : ignore text matched by regex for audio alignment purposes",
        u"  is_text_file_transliterate_map          : apply the given transliteration map for audio alignment purposes",
        u"",
        u"  os_task_file_format                     : output sync map format (*)",
        u"  os_task_file_id_regex                   : id regex for the output sync map (subtitles, plain)",
        u"  os_task_file_head_tail_format           : format audio head/tail (*)",
        u"  os_task_file_levels                     : output the specified levels (mplain)",
        u"  os_task_file_no_zero                    : if True, do not allow zero-length fragments",
        u"  os_task_file_smil_audio_ref             : value for the audio ref (smil, smilh, smilm)",
        u"  os_task_file_smil_page_ref              : value for the text ref (smil, smilh, smilm)",
        u"",
        u"  task_adjust_boundary_algorithm          : adjust sync map fragments using algorithm (*)",
        u"  task_adjust_boundary_aftercurrent_value : offset value, in seconds (aftercurrent)",
        u"  task_adjust_boundary_beforenext_value   : offset value, in seconds (beforenext)",
        u"  task_adjust_boundary_offset_value       : offset value, in seconds (offset)",
        u"  task_adjust_boundary_percent_value      : percent value, in [0..100], (percent)",
        u"  task_adjust_boundary_rate_value         : max rate, in characters/s (rate, rateaggressive)",
    ]

    VALUES = {
        "espeak": sorted(ESPEAKTTSWrapper.LANGUAGE_TO_VOICE_CODE.keys()),
        "espeak-ng": sorted(ESPEAKNGTTSWrapper.LANGUAGE_TO_VOICE_CODE.keys()),
        "festival": sorted(FESTIVALTTSWrapper.LANGUAGE_TO_VOICE_CODE.keys()),
        "nuance": sorted(NuanceTTSWrapper.LANGUAGE_TO_VOICE_CODE.keys()),
        "task_language": Language.ALLOWED_VALUES,
        "is_text_type": TextFileFormat.ALLOWED_VALUES,
        "is_text_unparsed_id_sort": IDSortingAlgorithm.ALLOWED_VALUES,
        "os_task_file_format": SyncMapFormat.ALLOWED_VALUES,
        "os_task_file_head_tail_format": SyncMapHeadTailFormat.ALLOWED_VALUES,
        "task_adjust_boundary_algorithm":
        AdjustBoundaryAlgorithm.ALLOWED_VALUES,
    }

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Execute a Task.",
        "synopsis": [
            (u"--list-parameters", False),
            (u"--list-values[=PARAM]", False),
            (u"AUDIO_FILE  TEXT_FILE CONFIG_STRING OUTPUT_FILE", True),
            (u"YOUTUBE_URL TEXT_FILE CONFIG_STRING OUTPUT_FILE -y", True),
        ],
        "examples": [u"--examples", u"--examples-all"],
        "options": [
            u"--faster-rate : print fragments with rate > task_adjust_boundary_rate_value",
            u"--keep-audio : do not delete the audio file downloaded from YouTube (-y only)",
            u"--largest-audio : download largest audio stream (-y only)",
            u"--list-parameters : list all parameters",
            u"--list-values : list all parameters for which values can be listed",
            u"--list-values=PARAM : list all allowed values for parameter PARAM",
            u"--output-html : output HTML file for fine tuning",
            u"--rates : print rate of each fragment",
            u"--skip-validator : do not validate the given config string",
            u"--zero : print fragments with zero duration",
            u"-y, --youtube : download audio from YouTube video",
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()

        if self.has_option([u"-e", u"--examples"]):
            return self.print_examples(False)

        if self.has_option(u"--examples-all"):
            return self.print_examples(True)

        if self.has_option([u"--list-parameters"]):
            return self.print_parameters()

        parameter = self.has_option_with_value(u"--list-values")
        if parameter is not None:
            return self.print_values(parameter)
        elif self.has_option(u"--list-values"):
            return self.print_values(u"?")

        # NOTE list() is needed for Python3, where keys() is not a list!
        demo = self.has_option(list(self.DEMOS.keys()))
        demo_parameters = u""
        download_from_youtube = self.has_option([u"-y", u"--youtube"])
        largest_audio = self.has_option(u"--largest-audio")
        keep_audio = self.has_option(u"--keep-audio")
        output_html = self.has_option(u"--output-html")
        validate = not self.has_option(u"--skip-validator")
        print_faster_rate = self.has_option(u"--faster-rate")
        print_rates = self.has_option(u"--rates")
        print_zero = self.has_option(u"--zero")

        if demo:
            validate = False
            for key in self.DEMOS:
                if self.has_option(key):
                    demo_parameters = self.DEMOS[key]
                    audio_file_path = demo_parameters[u"audio"]
                    text_file_path = demo_parameters[u"text"]
                    config_string = demo_parameters[u"config"]
                    sync_map_file_path = demo_parameters[u"syncmap"]
                    # TODO allow injecting rconf options directly from DEMOS options field
                    if key == u"--example-cewsubprocess":
                        self.rconf[
                            RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True
                    elif key == u"--example-ctw-espeak":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[
                            RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK
                    elif key == u"--example-ctw-speect":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[
                            RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT
                    elif key == u"--example-festival":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                    elif key == u"--example-mws":
                        self.rconf[
                            RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500"
                        self.rconf[
                            RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500"
                    elif key == u"--example-multilevel-tts":
                        self.rconf[RuntimeConfiguration.TTS_L1] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_L2] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_L3] = "espeak"
                    elif key == u"--example-words-festival-cache":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_CACHE] = True
                    elif key == u"--example-faster-rate":
                        print_faster_rate = True
                    elif key == u"--example-no-zero":
                        print_zero = True
                    elif key == u"--example-py":
                        self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False
                    elif key == u"--example-rates":
                        print_rates = True
                    elif key == u"--example-youtube":
                        download_from_youtube = True
                    break
        else:
            if len(self.actual_arguments) < 4:
                return self.print_help()
            audio_file_path = self.actual_arguments[0]
            text_file_path = self.actual_arguments[1]
            config_string = self.actual_arguments[2]
            sync_map_file_path = self.actual_arguments[3]

        html_file_path = None
        if output_html:
            keep_audio = True
            html_file_path = sync_map_file_path + u".html"

        if download_from_youtube:
            youtube_url = audio_file_path

        if (not download_from_youtube) and (
                not self.check_input_file(audio_file_path)):
            return self.ERROR_EXIT_CODE
        if not self.check_input_file(text_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(sync_map_file_path):
            return self.ERROR_EXIT_CODE
        if (html_file_path
                is not None) and (not self.check_output_file(html_file_path)):
            return self.ERROR_EXIT_CODE

        self.check_c_extensions()

        if demo:
            msg = []
            msg.append(u"Running example task with arguments:")
            if download_from_youtube:
                msg.append(u"  YouTube URL:   %s" % youtube_url)
            else:
                msg.append(u"  Audio file:    %s" % audio_file_path)
            msg.append(u"  Text file:     %s" % text_file_path)
            msg.append(u"  Config string: %s" % config_string)
            msg.append(u"  Sync map file: %s" % sync_map_file_path)
            if len(demo_parameters[u"options"]) > 0:
                msg.append(u"  Options:       %s" %
                           demo_parameters[u"options"])
            self.print_info(u"\n".join(msg))

        if validate:
            self.print_info(
                u"Validating config string (specify --skip-validator to bypass)..."
            )
            validator = Validator(logger=self.logger)
            result = validator.check_configuration_string(config_string,
                                                          is_job=False,
                                                          external_name=True)
            if not result.passed:
                self.print_error(u"The given config string is not valid:")
                self.print_generic(result.pretty_print())
                return self.ERROR_EXIT_CODE
            self.print_info(u"Validating config string... done")

        if download_from_youtube:
            try:
                self.print_info(u"Downloading audio from '%s' ..." %
                                youtube_url)
                downloader = Downloader(logger=self.logger)
                audio_file_path = downloader.audio_from_youtube(
                    youtube_url,
                    download=True,
                    output_file_path=None,
                    largest_audio=largest_audio)
                self.print_info(u"Downloading audio from '%s' ... done" %
                                youtube_url)
            except ImportError:
                self.print_no_pafy_error()
                return self.ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while downloading audio from YouTube:"
                )
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE
        else:
            audio_extension = gf.file_extension(audio_file_path)
            if audio_extension.lower() not in AudioFile.FILE_EXTENSIONS:
                self.print_warning(
                    u"Your audio file path has extension '%s', which is uncommon for an audio file."
                    % audio_extension)
                self.print_warning(
                    u"Attempting at executing your Task anyway.")
                self.print_warning(
                    u"If it fails, you might have swapped the first two arguments."
                )
                self.print_warning(
                    u"The audio file path should be the first argument, the text file path the second."
                )

        try:
            self.print_info(u"Creating task...")
            task = Task(config_string, logger=self.logger)
            task.audio_file_path_absolute = audio_file_path
            task.text_file_path_absolute = text_file_path
            task.sync_map_file_path_absolute = sync_map_file_path
            self.print_info(u"Creating task... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while creating the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Executing task...")
            executor = ExecuteTask(task=task,
                                   rconf=self.rconf,
                                   logger=self.logger)
            executor.execute()
            self.print_info(u"Executing task... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while executing the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating output sync map file...")
            path = task.output_sync_map_file()
            self.print_info(u"Creating output sync map file... done")
            self.print_success(u"Created file '%s'" % path)
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while writing the sync map file:"
            )
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        if output_html:
            try:
                parameters = {}
                parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration[
                    "o_format"]
                parameters[
                    gc.PPN_TASK_OS_FILE_EAF_AUDIO_REF] = task.configuration[
                        "o_eaf_audio_ref"]
                parameters[
                    gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration[
                        "o_smil_audio_ref"]
                parameters[
                    gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration[
                        "o_smil_page_ref"]
                self.print_info(u"Creating output HTML file...")
                task.sync_map.output_html_for_tuning(audio_file_path,
                                                     html_file_path,
                                                     parameters)
                self.print_info(u"Creating output HTML file... done")
                self.print_success(u"Created file '%s'" % html_file_path)
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while writing the HTML file:"
                )
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        if download_from_youtube:
            if keep_audio:
                self.print_info(
                    u"Option --keep-audio set: keeping downloaded file '%s'" %
                    audio_file_path)
            else:
                gf.delete_file(None, audio_file_path)

        if print_zero:
            zero_duration = [
                l for l in task.sync_map.fragments_tree.vleaves_not_empty
                if l.begin == l.end
            ]
            if len(zero_duration) > 0:
                self.print_warning(u"Fragments with zero duration:")
                for fragment in zero_duration:
                    self.print_generic(u"  %s" % fragment)

        if print_rates:
            self.print_info(u"Fragments with rates:")
            for fragment in task.sync_map.fragments_tree.vleaves_not_empty:
                self.print_generic(u"  %s (rate: %.3f chars/s)" %
                                   (fragment, fragment.rate))

        if print_faster_rate:
            max_rate = task.configuration["aba_rate_value"]
            if max_rate is not None:
                faster = [
                    l for l in task.sync_map.fragments_tree.vleaves_not_empty
                    if l.rate >= max_rate + Decimal("0.001")
                ]
                if len(faster) > 0:
                    self.print_warning(
                        u"Fragments with rate greater than %.3f:" % max_rate)
                    for fragment in faster:
                        self.print_generic(u"  %s (rate: %.3f chars/s)" %
                                           (fragment, fragment.rate))

        return self.NO_ERROR_EXIT_CODE

    def print_examples(self, full=False):
        """
        Print the examples and exit.

        :param bool full: if ``True``, print all examples; otherwise,
                          print only selected ones
        """
        msg = []
        i = 1
        for key in sorted(self.DEMOS.keys()):
            example = self.DEMOS[key]
            if full or example["show"]:
                msg.append(u"Example %d (%s)" % (i, example[u"description"]))
                # NOTE too verbose now that we have dozens of examples
                # COMMENTED msg.append(u"  $ CONFIG_STRING=\"%s\"" % (example[u"config"]))
                # COMMENTED msg.append(u"  $ %s %s %s \"$CONFIG_STRING\" %s %s" % (
                # COMMENTED     self.invoke,
                # COMMENTED     example[u"audio"],
                # COMMENTED     example[u"text"],
                # COMMENTED     example[u"syncmap"],
                # COMMENTED     example[u"options"]
                # COMMENTED ))
                # COMMENTED msg.append(u"  or")
                msg.append(u"  $ %s %s" % (self.invoke, key))
                msg.append(u"")
                i += 1
        self.print_generic(u"\n" + u"\n".join(msg) + u"\n")
        return self.HELP_EXIT_CODE

    def print_parameters(self):
        """
        Print the list of parameters and exit.
        """
        self.print_info(
            u"You can use --list-values=PARAM on parameters marked by (*)")
        self.print_info(u"Available parameters:")
        self.print_generic(u"\n" + u"\n".join(self.PARAMETERS) + u"\n")
        return self.HELP_EXIT_CODE

    def print_values(self, parameter):
        """
        Print the list of values for the given parameter and exit.

        If ``parameter`` is invalid, print the list of
        parameter names that have allowed values.

        :param parameter: the parameter name
        :type  parameter: Unicode string
        """
        if parameter in self.VALUES:
            self.print_info(u"Available values for parameter '%s':" %
                            parameter)
            self.print_generic(u", ".join(self.VALUES[parameter]))
            return self.HELP_EXIT_CODE
        if parameter not in [u"?", u""]:
            self.print_error(u"Invalid parameter name '%s'" % parameter)
        self.print_info(u"Parameters for which values can be listed:")
        self.print_generic(u", ".join(sorted(self.VALUES.keys())))
        return self.HELP_EXIT_CODE
コード例 #18
0
ファイル: execute_job.py プロジェクト: theolivenbaum/aeneas
class ExecuteJobCLI(AbstractCLIProgram):
    """
    Execute a Job, passed as a container or
    as a container and a configuration string
    (i.e., from a wizard).
    """
    CONTAINER_FILE = gf.relative_path("res/job.zip", __file__)
    CONTAINER_FILE_NO_CONFIG = gf.relative_path("res/job_no_config.zip",
                                                __file__)
    OUTPUT_DIRECTORY = "output/"
    CONFIG_STRING = u"is_hierarchy_type=flat|is_hierarchy_prefix=assets/|is_text_file_relative_path=.|is_text_file_name_regex=.*\.xhtml|is_text_type=unparsed|is_audio_file_relative_path=.|is_audio_file_name_regex=.*\.mp3|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric|os_job_file_name=demo_sync_job_output|os_job_file_container=zip|os_job_file_hierarchy_type=flat|os_job_file_hierarchy_prefix=assets/|os_task_file_name=\\$PREFIX.xhtml.smil|os_task_file_format=smil|os_task_file_smil_page_ref=\\$PREFIX.xhtml|os_task_file_smil_audio_ref=../Audio/\\$PREFIX.mp3|job_language=eng|job_description=Demo Sync Job"

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Execute a Job, passed as a container.",
        "synopsis": [(u"CONTAINER OUTPUT_DIR [CONFIG_STRING]", True)],
        "examples": [
            u"%s %s" % (CONTAINER_FILE, OUTPUT_DIRECTORY),
            u"%s %s --cewsubprocess" % (CONTAINER_FILE, OUTPUT_DIRECTORY),
            u"%s %s \"%s\"" %
            (CONTAINER_FILE_NO_CONFIG, OUTPUT_DIRECTORY, CONFIG_STRING)
        ],
        "options": [
            u"--cewsubprocess : run cew in separate process (see docs)",
            u"--skip-validator : do not validate the given container and/or config string"
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        container_path = self.actual_arguments[0]
        output_directory_path = self.actual_arguments[1]
        config_string = None
        if (len(self.actual_arguments)) > 2 and (
                not self.actual_arguments[2].startswith(u"-")):
            config_string = self.actual_arguments[2]
        validate = not self.has_option(u"--skip-validator")
        if self.has_option(u"--cewsubprocess"):
            self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True

        if not self.check_input_file_or_directory(container_path):
            return self.ERROR_EXIT_CODE

        if not self.check_output_directory(output_directory_path):
            return self.ERROR_EXIT_CODE

        if validate:
            try:
                self.print_info(
                    u"Validating the container (specify --skip-validator to bypass)..."
                )
                validator = Validator(rconf=self.rconf, logger=self.logger)
                result = validator.check_container(container_path,
                                                   config_string=config_string)
                if not result.passed:
                    self.print_error(u"The given container is not valid:")
                    self.print_error(result.pretty_print())
                    return self.ERROR_EXIT_CODE
                self.print_info(u"Validating the container... done")
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while validating the container:"
                )
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Loading job from container...")
            executor = ExecuteJob(rconf=self.rconf, logger=self.logger)
            executor.load_job_from_container(container_path, config_string)
            self.print_info(u"Loading job from container... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while loading the job:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Executing...")
            executor.execute()
            self.print_info(u"Executing... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while executing the job:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating output container...")
            path = executor.write_output_container(output_directory_path)
            self.print_info(u"Creating output container... done")
            self.print_success(u"Created output file '%s'" % path)
            executor.clean(True)
            return self.NO_ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while writing the output container:"
            )
            self.print_error(u"%s" % exc)

        return self.ERROR_EXIT_CODE
コード例 #19
0
ファイル: validate.py プロジェクト: shivupoojar/DeFog
class ValidateCLI(AbstractCLIProgram):
    """
    Perform validation in one of the following modes:

    1. a container
    2. a job configuration string
    3. a task configuration string
    4. a container + configuration string from wizard
    5. a job TXT configuration file
    6. a job XML configuration file
    """
    CONFIG_FILE_TXT = gf.relative_path("res/config.txt", __file__)
    CONFIG_FILE_XML = gf.relative_path("res/config.xml", __file__)
    CONTAINER_FILE = gf.relative_path("res/job.zip", __file__)
    JOB_CONFIG_STRING = u"job_language=ita|os_job_file_name=output.zip|os_job_file_container=zip|is_hierarchy_type=flat"
    TASK_CONFIG_STRING = u"task_language=ita|is_text_type=plain|os_task_file_name=output.txt|os_task_file_format=txt"
    WRONG_CONFIG_STRING = u"job_language=ita|invalid=string"
    GOOD_CONFIG_STRING = u"is_hierarchy_type=flat|is_hierarchy_prefix=assets/|is_text_file_relative_path=.|is_text_file_name_regex=.*\.xhtml|is_text_type=unparsed|is_audio_file_relative_path=.|is_audio_file_name_regex=.*\.mp3|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric|os_job_file_name=demo_sync_job_output|os_job_file_container=zip|os_job_file_hierarchy_type=flat|os_job_file_hierarchy_prefix=assets/|os_task_file_name=\\$PREFIX.xhtml.smil|os_task_file_format=smil|os_task_file_smil_page_ref=\\$PREFIX.xhtml|os_task_file_smil_audio_ref=../Audio/\\$PREFIX.mp3|job_language=eng|job_description=Demo Sync Job"

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Perform validation of a config string or a container",
        "synopsis":
        [(u"config CONFIG.TXT", True), (u"config CONFIG.XML", True),
         (u"container CONTAINER", True), (u"job CONFIG_STRING", True),
         (u"task CONFIG_STRING", True),
         (u"wizard CONFIG_STRING CONTAINER", True)],
        "examples": [
            u"config %s" % (CONFIG_FILE_TXT),
            u"config %s" % (CONFIG_FILE_XML),
            u"container %s" % (CONTAINER_FILE),
            u"job \"%s\"" % (JOB_CONFIG_STRING),
            u"task \"%s\"" % (TASK_CONFIG_STRING),
            u"wizard \"%s\" %s" % (GOOD_CONFIG_STRING, CONTAINER_FILE)
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        mode = self.actual_arguments[0]

        validator = Validator(rconf=self.rconf, logger=self.logger)
        if mode == u"config":
            config_file_path = self.actual_arguments[1]
            config_txt = None
            if config_file_path.lower().endswith(u".txt"):
                config_txt = True
            elif config_file_path.lower().endswith(u".xml"):
                config_txt = False
            else:
                return self.print_help()
            if not self.check_input_file(config_file_path):
                return self.ERROR_EXIT_CODE
            contents = gf.read_file_bytes(config_file_path)
            if contents is None:
                return self.ERROR_EXIT_CODE
            if config_txt:
                result = validator.check_config_txt(contents)
                msg = u"TXT configuration"
            else:
                result = validator.check_config_xml(contents)
                msg = "XML configuration"
        elif mode == u"container":
            container_path = self.actual_arguments[1]
            result = validator.check_container(container_path)
            msg = "container"
        elif mode == u"job":
            config_string = self.actual_arguments[1]
            result = validator.check_configuration_string(config_string,
                                                          is_job=True)
            msg = u"job configuration string"
        elif mode == u"task":
            config_string = self.actual_arguments[1]
            result = validator.check_configuration_string(config_string,
                                                          is_job=False,
                                                          external_name=True)
            msg = u"task configuration string"
        elif mode == u"wizard":
            if (len(self.actual_arguments) <
                    3) or (self.actual_arguments[2].startswith(u"-")):
                return self.print_help()
            config_string = self.actual_arguments[1]
            container_path = self.actual_arguments[2]
            if not self.check_input_file(container_path):
                return self.ERROR_EXIT_CODE
            result = validator.check_container(container_path,
                                               config_string=config_string)
            msg = "container with configuration string from wizard"
        else:
            return self.print_help()

        if result.passed:
            self.print_success(u"Valid %s" % msg)
            for warning in result.warnings:
                self.print_warning(u"%s" % warning)
            return self.NO_ERROR_EXIT_CODE
        else:
            self.print_error(u"Invalid %s" % msg)
            for error in result.errors:
                self.print_error(u"%s" % error)

        return self.ERROR_EXIT_CODE
コード例 #20
0
class AbstractCLIProgram(Loggable):
    """
    This class is an "abstract" CLI program.

    To create a new CLI program, create a new class,
    derived from this one, and overload
    ``NAME``, ``HELP``, and ``perform_command()``.

    :param use_sys: if ``True``, call ``sys.exit`` when needed;
                    otherwise, never call ``sys.exit`` and
                    just return a return code/value
    :type  use_sys: bool
    :param string invoke: the CLI command to be invoked
    :param rconf: a runtime configuration. Default: ``None``, meaning that
                  default settings will be used.
    :type  rconf: :class:`aeneas.runtimeconfiguration.RuntimeConfiguration`
    """

    NAME = gf.file_name_without_extension(__file__)

    AENEAS_URL = u"http://www.readbeyond.it/aeneas/"
    DOCS_URL = u"http://www.readbeyond.it/aeneas/docs/"
    GITHUB_URL = u"https://github.com/ReadBeyond/aeneas/"
    ISSUES_URL = u"https://github.com/ReadBeyond/aeneas/issues/"
    RB_URL = u"http://www.readbeyond.it"

    NO_ERROR_EXIT_CODE = 0
    ERROR_EXIT_CODE = 1
    HELP_EXIT_CODE = 2

    HELP = {
        "description": u"An abstract CLI program",
        "synopsis": [],
        "options": [],
        "parameters": [],
        "examples": []
    }

    TAG = u"CLI"

    def __init__(self, use_sys=True, invoke=None, rconf=None, logger=None):
        super(AbstractCLIProgram, self).__init__(rconf=rconf, logger=logger)
        self.invoke = u"python -m aeneas.tools.%s" % (self.NAME) if (
            invoke is None) else invoke
        self.use_sys = use_sys
        self.formal_arguments_raw = []
        self.formal_arguments = []
        self.actual_arguments = []
        self.log_file_path = None
        self.verbose = False
        self.very_verbose = False

    PREFIX_TO_PRINT_FUNCTION = {
        Logger.CRITICAL: gf.print_error,
        Logger.DEBUG: gf.print_info,
        Logger.ERROR: gf.print_error,
        Logger.INFO: gf.print_info,
        Logger.SUCCESS: gf.print_success,
        Logger.WARNING: gf.print_warning
    }

    def print_generic(self, msg, prefix=None):
        """
        Print a message and log it.

        :param msg: the message
        :type  msg: Unicode string
        :param prefix: the (optional) prefix
        :type  prefix: Unicode string
        """
        if prefix is None:
            self._log(msg, Logger.INFO)
        else:
            self._log(msg, prefix)
        if self.use_sys:
            if (prefix is not None) and (prefix
                                         in self.PREFIX_TO_PRINT_FUNCTION):
                self.PREFIX_TO_PRINT_FUNCTION[prefix](msg)
            else:
                gf.safe_print(msg)

    def print_error(self, msg):
        """
        Print an error message and log it.

        :param string msg: the message
        """
        self.print_generic(msg, Logger.ERROR)

    def print_info(self, msg):
        """
        Print an info message and log it.

        :param string msg: the message
        """
        self.print_generic(msg, Logger.INFO)

    def print_success(self, msg):
        """
        Print a success message and log it.

        :param string msg: the message
        """
        self.print_generic(msg, Logger.SUCCESS)

    def print_warning(self, msg):
        """
        Print a warning message and log it.

        :param string msg: the message
        """
        self.print_generic(msg, Logger.WARNING)

    def exit(self, code):
        """
        Exit with the given exit code,
        possibly with ``sys.exit()``.

        :param code: the exit code
        :type  code: int
        :rtype: int
        """
        if self.use_sys:
            sys.exit(code)
        return code

    def print_help(self, short=False):
        """
        Print help message and exit.

        :param short: print short help only
        :type  short: bool
        """
        header = [
            u"",
            u"NAME",
            u"  %s - %s" % (self.NAME, self.HELP["description"]),
            u"",
        ]

        synopsis = [u"SYNOPSIS", u"  %s [-h|--help|--version]" % (self.invoke)]
        if "synopsis" in self.HELP:
            for syn, opt in self.HELP["synopsis"]:
                if opt:
                    opt = u" [OPTIONS]"
                else:
                    opt = u""
                synopsis.append(u"  %s %s%s" % (self.invoke, syn, opt))

        synopsis.append(u"")

        options = [
            u"  -h : print short help and exit",
            u"  --help : print full help and exit",
            u"  --version : print the program name and version and exit",
            u"  -l[=FILE], --log[=FILE] : log verbose output to tmp file or FILE if specified",
            u"  -r=CONF, --runtime-configuration=CONF : apply runtime configuration CONF",
            u"  -v, --verbose : verbose output",
            u"  -vv, --very-verbose : verbose output, print date/time values",
        ]
        if "options" in self.HELP:
            for opt in self.HELP["options"]:
                options.append(u"  %s" % (opt))
        options = [u"OPTIONS"] + sorted(options) + [u""]

        parameters = []
        if ("parameters" in self.HELP) and (len(self.HELP["parameters"]) > 0):
            parameters.append(u"PARAMETERS")
            for par in self.HELP["parameters"]:
                parameters.append(u"  %s" % (par))
            parameters.append(u"")

        examples = []
        if ("examples" in self.HELP) and (len(self.HELP["examples"]) > 0):
            examples.append(u"EXAMPLES")
            for exa in self.HELP["examples"]:
                examples.append(u"  %s %s" % (self.invoke, exa))
            examples.append(u"")

        footer = [
            u"EXIT CODES",
            u"  %d : no error" % (self.NO_ERROR_EXIT_CODE),
            u"  %d : error" % (self.ERROR_EXIT_CODE),
            u"  %d : help shown, no command run" % (self.HELP_EXIT_CODE),
            u"",
            u"AUTHOR",
            u"  Alberto Pettarin, http://www.albertopettarin.it/",
            u"",
            u"REPORTING BUGS",
            u"  Please use the GitHub Issues Web page : %s" %
            (self.ISSUES_URL),
            u"",
            u"COPYRIGHT",
            u"  2012-2016, Alberto Pettarin and ReadBeyond Srl",
            u"  This software is available under the terms of the GNU Affero General Public License Version 3",
            u"",
            u"SEE ALSO",
            u"  Code repository  : %s" % (self.GITHUB_URL),
            u"  Documentation    : %s" % (self.DOCS_URL),
            u"  Project Web page : %s" % (self.AENEAS_URL),
            u"",
        ]

        msg = header + synopsis + options + parameters + examples
        if not short:
            msg += footer
        if self.use_sys:
            self.print_generic(u"\n".join(msg))
        return self.exit(self.HELP_EXIT_CODE)

    def print_name_version(self):
        """
        Print program name and version and exit.

        :rtype: int
        """
        if self.use_sys:
            self.print_generic(u"%s v%s" % (self.NAME, aeneas_version))
        return self.exit(self.HELP_EXIT_CODE)

    def run(self, arguments, show_help=True):
        """
        Program entry point.

        Please note that the first item in ``arguments`` is discarded,
        as it is assumed to be the script/invocation name;
        pass a "dumb" placeholder if you call this method with
        an argument different that ``sys.argv``.

        :param arguments: the list of arguments
        :type  arguments: list
        :param show_help: if ``False``, do not show help on ``-h`` and ``--help``
        :type  show_help: bool
        :rtype: int
        """
        # convert arguments into Unicode strings
        if self.use_sys:
            # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8
            if not gf.FROZEN:
                if sys.stdin.encoding not in ["UTF-8", "UTF8"]:
                    self.print_warning(
                        u"The default input encoding is not UTF-8.")
                    self.print_warning(
                        u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell."
                    )
                if sys.stdout.encoding not in ["UTF-8", "UTF8"]:
                    self.print_warning(
                        u"The default output encoding is not UTF-8.")
                    self.print_warning(
                        u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell."
                    )
            # decode using sys.stdin.encoding
            args = [gf.safe_unicode_stdin(arg) for arg in arguments]
        else:
            # decode using utf-8 (but you should pass Unicode strings as parameters anyway)
            args = [gf.safe_unicode(arg) for arg in arguments]

        if show_help:
            if u"-h" in args:
                return self.print_help(short=True)

            if u"--help" in args:
                return self.print_help(short=False)

            if u"--version" in args:
                return self.print_name_version()

        # store formal arguments
        self.formal_arguments_raw = arguments
        self.formal_arguments = args

        # to obtain the actual arguments,
        # remove the first one and "special" switches
        args = args[1:]
        set_args = set(args)

        # set verbosity, if requested
        for flag in set([u"-v", u"--verbose"]) & set_args:
            self.verbose = True
            args.remove(flag)
        for flag in set([u"-vv", u"--very-verbose"]) & set_args:
            self.verbose = True
            self.very_verbose = True
            args.remove(flag)

        # set RuntimeConfiguration string, if specified
        for flag in [u"-r", u"--runtime-configuration"]:
            rconf_string = self.has_option_with_value(flag,
                                                      actual_arguments=False)
            if rconf_string is not None:
                self.rconf = RuntimeConfiguration(rconf_string)
                args.remove("%s=%s" % (flag, rconf_string))

        # set log file path, if requested
        log_path = None
        for flag in [u"-l", u"--log"]:
            log_path = self.has_option_with_value(flag, actual_arguments=False)
            if log_path is not None:
                args.remove("%s=%s" % (flag, log_path))
            elif flag in set_args:
                handler, log_path = gf.tmp_file(
                    suffix=u".log",
                    root=self.rconf[RuntimeConfiguration.TMP_PATH])
                args.remove(flag)
            if log_path is not None:
                self.log_file_path = log_path

        # if no actual arguments left, print help
        if (len(args) < 1) and (show_help):
            return self.print_help(short=True)

        # store actual arguments
        self.actual_arguments = args

        # create logger
        self.logger = Logger(tee=self.verbose,
                             tee_show_datetime=self.very_verbose)
        self.log([u"Formal arguments: %s", self.formal_arguments])
        self.log([u"Actual arguments: %s", self.actual_arguments])
        self.log([u"Runtime configuration: '%s'", self.rconf.config_string()])

        # perform command
        exit_code = self.perform_command()
        self.log([u"Execution completed with code %d", exit_code])

        # output log if requested
        if self.log_file_path is not None:
            self.log([
                u"User requested saving log to file '%s'", self.log_file_path
            ])
            self.logger.write(self.log_file_path)
            if self.use_sys:
                self.print_info(u"Log written to file '%s'" %
                                self.log_file_path)

        return self.exit(exit_code)

    def has_option(self, target):
        """
        Return ``True`` if the actual arguments include
        the specified ``target`` option or,
        if ``target`` is a list of options,
        at least one of them.

        :param target: the option or a list of options
        :type  target: Unicode string or list of Unicode strings
        :rtype: bool
        """
        if isinstance(target, list):
            target_set = set(target)
        else:
            target_set = set([target])
        return len(target_set & set(self.actual_arguments)) > 0

    def has_option_with_value(self, prefix, actual_arguments=True):
        """
        Check if the actual arguments include an option
        starting with the given ``prefix`` and having a value,
        e.g. ``--format=ogg`` for ``prefix="--format"``.

        :param prefix: the option prefix
        :type  prefix: Unicode string
        :param actual_arguments: if ``True``, check among actual arguments;
                                 otherwise check among formal arguments
        :rtype actual_arguments: bool
        :rtype: Unicode string or None
        """
        if actual_arguments:
            args = self.actual_arguments
        else:
            args = self.formal_arguments
        for arg in [
                arg for arg in args
                if (arg is not None) and (arg.startswith(prefix + u"="))
        ]:
            lis = arg.split(u"=")
            if len(lis) >= 2:
                return u"=".join(lis[1:])
        return None

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        self.log(u"This function should be overloaded in derived classes")
        self.log([u"Invoked with %s", self.actual_arguments])
        return self.NO_ERROR_EXIT_CODE

    def check_c_extensions(self, name=None):
        """
        If C extensions cannot be run, emit a warning
        and return ``False``. Otherwise return ``True``.
        If ``name`` is not ``None``, check just
        the C extension with that name.

        :param name: the name of the Python C extension to test
        :type  name: string
        :rtype: bool
        """
        if not gf.can_run_c_extension(name=name):
            if name is None:
                self.print_warning(u"Unable to load Python C Extensions")
            else:
                self.print_warning(u"Unable to load Python C Extension %s" %
                                   (name))
            self.print_warning(u"Running the slower pure Python code")
            self.print_warning(
                u"See the documentation for directions to compile the Python C Extensions"
            )
            return False
        return True

    def check_input_file_or_directory(self, path):
        """
        If the given path does not exist, emit an error
        and return ``False``. Otherwise return ``True``.

        :param path: the path of the input file or directory
        :type  path: string (path)
        :rtype: bool
        """
        if (not gf.file_can_be_read(path)) and (not os.path.isdir(path)):
            self.print_error(u"Unable to read file or directory '%s'" % (path))
            self.print_error(
                u"Make sure the path is written/escaped correctly and that you have read permission on it"
            )
            return False
        return True

    def check_input_file(self, path):
        """
        If the given path does not exist, emit an error
        and return ``False``. Otherwise return ``True``.

        :param path: the path of the input file
        :type  path: string (path)
        :rtype: bool
        """
        if not gf.file_can_be_read(path):
            self.print_error(u"Unable to read file '%s'" % (path))
            self.print_error(
                u"Make sure the file path is written/escaped correctly and that you have read permission on it"
            )
            return False
        return True

    def check_output_file(self, path):
        """
        If the given path cannot be written, emit an error
        and return ``False``. Otherwise return ``True``.

        :param path: the path of the output file
        :type  path: string (path)
        :rtype: bool
        """
        if not gf.file_can_be_written(path):
            self.print_error(u"Unable to create file '%s'" % (path))
            self.print_error(
                u"Make sure the file path is written/escaped correctly and that you have write permission on it"
            )
            return False
        return True

    def check_output_directory(self, path):
        """
        If the given directory cannot be written, emit an error
        and return ``False``. Otherwise return ``True``.

        :param path: the path of the output directory
        :type  path: string (path)
        :rtype: bool
        """
        if not os.path.isdir(path):
            self.print_error(u"Directory '%s' does not exist" % (path))
            return False
        test_file = os.path.join(path, u"file.test")
        if not gf.file_can_be_written(test_file):
            self.print_error(u"Unable to write inside directory '%s'" % (path))
            self.print_error(
                u"Make sure the directory path is written/escaped correctly and that you have write permission on it"
            )
            return False
        return True

    def get_text_file(self, text_format, text, parameters):
        if text_format == u"list":
            text_file = TextFile(logger=self.logger)
            text_file.read_from_list(text.split(u"|"))
            return text_file
        else:
            if text_format not in TextFileFormat.ALLOWED_VALUES:
                self.print_error(u"File format '%s' is not allowed" %
                                 (text_format))
                self.print_error(u"Allowed text file formats: %s" %
                                 (" ".join(TextFileFormat.ALLOWED_VALUES)))
                return None
            try:
                return TextFile(text,
                                text_format,
                                parameters,
                                logger=self.logger)
            except OSError:
                self.print_error(u"Cannot read file '%s'" % (text))
            return None

    def print_no_pafy_error(self):
        self.print_error(
            u"You need to install Python modules youtube-dl and pafy to download audio from YouTube. Run:"
        )
        self.print_error(u"$ pip install youtube-dl pafy")
        self.print_error(u"or, to install for all users:")
        self.print_error(u"$ sudo pip install youtube-dl pafy")
コード例 #21
0
ファイル: plot_waveform.py プロジェクト: shivupoojar/DeFog
class PlotWaveformCLI(AbstractCLIProgram):
    """
    Plot a waveform and labels to file.
    """
    AUDIO_FILE = gf.relative_path("res/audio.mp3", __file__)
    VAD_FILE = gf.relative_path("res/sonnet.vad", __file__)
    OUTPUT_FILE = "output/sonnet.png"

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Plot a waveform and labels to file.",
        "synopsis":
        [(u"AUDIO_FILE OUTPUT_FILE [-i LABEL_FILE [-i LABEL_FILE [...]]]",
          True)],
        "examples": [u"%s %s -i %s" % (AUDIO_FILE, OUTPUT_FILE, VAD_FILE)],
        "options": [
            u"--fast : enable fast waveform rendering (default: False)",
            u"--hzoom=ZOOM : horizontal zoom (int, default: 5)",
            u"--label=LABEL : label for the plot (str)",
            u"--text : show fragment text instead of identifier",
            u"--time-step=STEP : print time ticks every STEP seconds (int)",
            u"--vzoom=ZOOM : vertical zoom (int, default: 30)"
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        input_file_path = self.actual_arguments[0]
        output_file_path = self.actual_arguments[1]

        if not self.check_input_file(input_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        fast = self.has_option("--fast")
        fragment_text = self.has_option("--text")
        h_zoom = gf.safe_int(self.has_option_with_value("--hzoom"), 5)
        label = self.has_option_with_value("--label")
        time_step = gf.safe_int(self.has_option_with_value("--time-step"), 0)
        v_zoom = gf.safe_int(self.has_option_with_value("--vzoom"), 30)

        labels = not self.has_option("--no-labels")
        begin_times = not self.has_option("--no-begin-times")
        end_times = not self.has_option("--no-end-times")
        begin_guides = not self.has_option("--no-begin-guides")
        end_guides = not self.has_option("--no-end-guides")

        try:
            # import or ImportError
            from aeneas.plotter import PlotLabelset
            from aeneas.plotter import PlotTimeScale
            from aeneas.plotter import PlotWaveform
            from aeneas.plotter import Plotter

            # create plotter object
            self.print_info(u"Plotting to file...")
            plotter = Plotter(rconf=self.rconf, logger=self.logger)

            # add waveform
            afm = AudioFile(input_file_path,
                            rconf=self.rconf,
                            logger=self.logger)
            afm.read_samples_from_file()
            plotter.add_waveform(
                PlotWaveform(afm,
                             label=label,
                             fast=fast,
                             rconf=self.rconf,
                             logger=self.logger))

            # add time scale, if requested
            if time_step > 0:
                plotter.add_timescale(
                    PlotTimeScale(afm.audio_length,
                                  time_step=time_step,
                                  rconf=self.rconf,
                                  logger=self.logger))

            # add labelsets, if any
            for i in range(len(self.actual_arguments)):
                if (self.actual_arguments[i]
                        == "-i") and (i + 1 < len(self.actual_arguments)):
                    label_file_path = self.actual_arguments[i + 1]
                    extension = gf.file_extension(label_file_path)
                    if extension == "vad":
                        labelset = self._read_syncmap_file(
                            label_file_path, SyncMapFormat.TSV, False)
                        ls = PlotLabelset(labelset,
                                          parameters=None,
                                          rconf=self.rconf,
                                          logger=self.logger)
                        ls.parameters["labels"] = False
                        ls.parameters["begin_time"] = begin_times
                        ls.parameters["end_time"] = end_times
                        ls.parameters["begin_guide"] = begin_guides
                        ls.parameters["end_guide"] = end_guides
                        plotter.add_labelset(ls)
                    if extension in SyncMapFormat.ALLOWED_VALUES:
                        labelset = self._read_syncmap_file(
                            label_file_path, extension, fragment_text)
                        ls = PlotLabelset(labelset,
                                          parameters=None,
                                          rconf=self.rconf,
                                          logger=self.logger)
                        ls.parameters["labels"] = labels
                        ls.parameters["begin_time"] = begin_times
                        ls.parameters["end_time"] = end_times
                        ls.parameters["begin_guide"] = begin_guides
                        ls.parameters["end_guide"] = end_guides
                        plotter.add_labelset(ls)

            # output to file
            plotter.draw_png(output_file_path, h_zoom=h_zoom, v_zoom=v_zoom)
            self.print_info(u"Plotting to file... done")
            self.print_success(u"Created file '%s'" % output_file_path)
            return self.NO_ERROR_EXIT_CODE
        except ImportError:
            self.print_error(
                u"You need to install Python module Pillow to output image to file. Run:"
            )
            self.print_error(u"$ pip install Pillow")
            self.print_error(u"or, to install for all users:")
            self.print_error(u"$ sudo pip install Pillow")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while generating the image file:"
            )
            self.print_error(u"%s" % exc)

        return self.ERROR_EXIT_CODE

    def _read_syncmap_file(self, path, extension, text=False):
        """ Read labels from a SyncMap file """
        syncmap = SyncMap(logger=self.logger)
        syncmap.read(extension, path, parameters=None)
        if text:
            return [(f.begin, f.end, u" ".join(f.text_fragment.lines))
                    for f in syncmap.fragments]
        return [(f.begin, f.end, f.text_fragment.identifier)
                for f in syncmap.fragments]
コード例 #22
0
class RunSDCLI(AbstractCLIProgram):
    """
    Detect the audio head and/or tail of the given audio file.
    """
    AUDIO_FILE = gf.relative_path("res/audio.mp3", __file__)
    PARAMETERS_HEAD = "--min-head=0.0 --max-head=5.0"
    PARAMETERS_TAIL = "--min-tail=1.0 --max-tail=5.0"
    TEXT_FILE = gf.relative_path("res/parsed.txt", __file__)

    NAME = gf.file_name_without_extension(__file__)

    HELP = {
        "description":
        u"Detect the audio head and/or tail of the given audio file.",
        "synopsis":
        [(u"list 'fragment 1|fragment 2|...|fragment N' LANGUAGE AUDIO_FILE",
          True),
         (u"[mplain|munparsed|parsed|plain|subtitles|unparsed] TEXT_FILE LANGUAGE AUDIO_FILE",
          True)],
        "examples": [
            u"parsed %s eng %s" % (TEXT_FILE, AUDIO_FILE),
            u"parsed %s eng %s %s" % (TEXT_FILE, AUDIO_FILE, PARAMETERS_HEAD),
            u"parsed %s eng %s %s" % (TEXT_FILE, AUDIO_FILE, PARAMETERS_TAIL),
            u"parsed %s eng %s %s %s" %
            (TEXT_FILE, AUDIO_FILE, PARAMETERS_HEAD, PARAMETERS_TAIL),
        ],
        "options": [
            u"--class-regex=REGEX : extract text from elements with class attribute matching REGEX (unparsed)",
            u"--id-regex=REGEX : extract text from elements with id attribute matching REGEX (unparsed)",
            u"--l1-id-regex=REGEX : extract text from level 1 elements with id attribute matching REGEX (munparsed)",
            u"--l2-id-regex=REGEX : extract text from level 2 elements with id attribute matching REGEX (munparsed)",
            u"--l3-id-regex=REGEX : extract text from level 3 elements with id attribute matching REGEX (munparsed)",
            u"--max-head=DUR : audio head has at most DUR seconds",
            u"--max-tail=DUR : audio tail has at most DUR seconds",
            u"--min-head=DUR : audio head has at least DUR seconds",
            u"--min-tail=DUR : audio tail has at least DUR seconds",
            u"--sort=ALGORITHM : sort the matched element id attributes using ALGORITHM (lexicographic, numeric, unsorted)"
        ]
    }

    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort,
        }
        if (text_format == TextFileFormat.MUNPARSED) and (
            (l1_id_regex is None) or (l2_id_regex is None) or
            (l3_id_regex is None)):
            self.print_error(
                u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format"
            )
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (
                id_regex is None) and (class_regex is None):
            self.print_error(
                u"You must specify --id-regex and/or --class-regex for unparsed format"
            )
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        audio_file_path = self.actual_arguments[3]
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(
                u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" %
                        (len(text_file)))

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path,
                                            rconf=self.rconf,
                                            logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Running VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Running VAD... done")

        min_head = gf.safe_float(self.has_option_with_value(u"--min-head"),
                                 None)
        max_head = gf.safe_float(self.has_option_with_value(u"--max-head"),
                                 None)
        min_tail = gf.safe_float(self.has_option_with_value(u"--min-tail"),
                                 None)
        max_tail = gf.safe_float(self.has_option_with_value(u"--max-tail"),
                                 None)

        self.print_info(u"Detecting audio interval...")
        start_detector = SD(audio_file_mfcc,
                            text_file,
                            rconf=self.rconf,
                            logger=self.logger)
        start, end = start_detector.detect_interval(min_head, max_head,
                                                    min_tail, max_tail)
        self.print_info(u"Detecting audio interval... done")

        self.print_result(audio_file_mfcc.audio_length, start, end)
        return self.NO_ERROR_EXIT_CODE

    def print_result(self, audio_len, start, end):
        """
        Print result of SD.

        :param audio_len: the length of the entire audio file, in seconds
        :type  audio_len: float
        :param start: the start position of the spoken text
        :type  start: float
        :param end: the end position of the spoken text
        :type  end: float
        """
        msg = []
        zero = 0
        head_len = start
        text_len = end - start
        tail_len = audio_len - end
        msg.append(u"")
        msg.append(u"Head: %.3f %.3f (%.3f)" % (zero, start, head_len))
        msg.append(u"Text: %.3f %.3f (%.3f)" % (start, end, text_len))
        msg.append(u"Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len))
        msg.append(u"")
        zero_h = gf.time_to_hhmmssmmm(0)
        start_h = gf.time_to_hhmmssmmm(start)
        end_h = gf.time_to_hhmmssmmm(end)
        audio_len_h = gf.time_to_hhmmssmmm(audio_len)
        head_len_h = gf.time_to_hhmmssmmm(head_len)
        text_len_h = gf.time_to_hhmmssmmm(text_len)
        tail_len_h = gf.time_to_hhmmssmmm(tail_len)
        msg.append("Head: %s %s (%s)" % (zero_h, start_h, head_len_h))
        msg.append("Text: %s %s (%s)" % (start_h, end_h, text_len_h))
        msg.append("Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h))
        msg.append(u"")
        self.print_info(u"\n".join(msg))