コード例 #1
0
def generate_scorer(directory, phrases):
    print("Generating scorer: {}".format(directory))
    languagemodel_path = get_languagemodel_path(directory)
    with open(languagemodel_path, "w") as f:
        f.writelines(line_generator(phrases))
    version = "v{}".format(deepspeech.version())
    sourcedir_name = "STT{}".format(version)
    sourcedir_path = paths.sub(os.path.join("sources", sourcedir_name))
    kenlm_sourcedir = paths.sub(os.path.join("sources", "kenlm"))
    kenlm_bins = os.path.join(kenlm_sourcedir, 'build', 'bin')
    cmd = [
        'python',
        os.path.join(sourcedir_path, 'data', 'lm',
                     'generate_lm.py'), '--input_txt', languagemodel_path,
        '--output_dir', directory, '--kenlm_bins', kenlm_bins, '--binary_type',
        'trie', '--top_k', '500000', '--arpa_order', '5', '--max_arpa_memory',
        '10%', '--arpa_prune', '0|0|1', '--binary_a_bits', '255',
        '--binary_q_bits', '8', '--discount_fallback'
    ]
    completed_process = run_command(cmd, 2)
    if (completed_process.returncode == 0):
        # There should be an additional step here where the
        # default values of alpha and beta are generated by
        # lm_optimizer. However, that involves building a
        # dev set from data in the audiolog.
        # Generate the scorer package
        # I'm still trying to figure out where to get the
        # generate_scorer_package binary. I found mine in the
        # ~/.cache/bazel folder after running bazel build
        # on //native_client:generate_scorer_package
        # I think there are binary packages available on
        # github, but you have to know your processor type,
        # and since I am testing on Raspbianx86, I have to
        # build it. x86_64 or rpi are available on Github.
        print("Generating scorer package")
        binarydir_name = "native_client{}".format(version)
        binarydir_path = paths.sub(
            os.path.join("sources", binarydir_name, "generate_scorer_package"))
        cmd = [
            binarydir_path, '--alphabet',
            os.path.join(sourcedir_path, 'data', 'alphabet.txt'), '--lm',
            os.path.join(directory, 'lm.binary'), '--vocab',
            os.path.join(directory, 'vocab-500000.txt'), '--package',
            os.path.join(directory, 'scorer'), '--default_alpha',
            '0.931289105002', '--default_beta', '1.18341375810284'
        ]
        completed_process = run_command(cmd, 2)
        if (completed_process.returncode != 0):
            print(completed_process.stderr.decode("UTF-8"))
    else:
        print(completed_process.stderr.decode("UTF-8"))
    print(os.path.join(directory, 'scorer'))
    return os.path.join(directory, 'scorer')
コード例 #2
0
 def __call__(self, *args, **kwargs):
     print('DeepSpeech ', version())
     exit(0)
コード例 #3
0
ファイル: __main__.py プロジェクト: norbusan/susi-assistant
def install_uninstall(args):
    if len(args) < 3:
        raise ValueError(f"incorrect invocation of {args[1]} action", args[2:])
    if args[2] == 'flite-data' or args[2] == 'susi-server' or args[2] == 'susi-skillpad':
        if len(args) > 3:
            raise ValueError(f"incorrect invocation of {args[1]} action", args[2:])
    elif args[2] == 'deepspeech-data':
        if len(args) > 4:
            raise ValueError(f"incorrect invocation of {args[1]} action", args[2:])
        elif len(args) == 4 and args[3] != "en-US":
            raise ValueError(f"currently only en_US is supported language for DeepSpeech data", args[2:])
    else:
        raise ValueError(f"incorrect invocation of {args[1]} action", args[2:])

    cfg = SusiConfig()
    basepath = cfg.get('path.base')
    flitep = cfg.get('path.flite_speech')
    dsp = cfg.get('path.deepspeech')
    susihome = cfg.get('path.susi_home')

    if not os.path.isabs(flitep):
        flitep = os.path.abspath(os.path.join(basepath, flitep))
    if not os.path.isabs(dsp):
        dsp = os.path.abspath(os.path.join(basepath, dsp, "en-US"))

    if args[1] == 'install':
        if args[2] == 'flite-data':
            if not os.path.exists(flitep):
                download_file_with_progress(FLITE_URL, flitep)
                print(f"Successfully installed {flitep}")
            else:
                print(f"Already available: {flitep}")
        elif args[2] == 'deepspeech-data':
            # need to find the currently used deepspeech version number
            import deepspeech
            ds_version = deepspeech.version()
            if not os.path.exists(dsp):
                os.makedirs(dsp)
            for ext in ['pbmm', 'tflite', 'scorer']:
                fn = f"deepspeech-{ds_version}-models.{ext}"
                url = f"{DEEPSPEECH_URL % ds_version}/{fn}"
                dest = os.path.join(dsp, fn)
                if not os.path.exists(dest):
                    download_file_with_progress(url, dest)
                else:
                    print(f"Already available: {dest}")
            print(f"Successfully install DeepSpeech model files in {dsp}")
        elif args[2] == 'susi-server':
            println("TODO!!!")
        elif args[2] == 'susi-skillpad':
            println("TODO!!!")
        else: # already checked above, though ...
            raise ValueError(f"incorrect invocation of {args[1]} action", args[2:])

    elif args[1] == 'uninstall':
        if args[2] == 'flite-data':
            if os.path.exists(flitep):
                try:
                    os.remove(flitep)
                    print(f"Removed {flitep}")
                except OSError as e:
                    print(f"Cannot remove flite file {flitep}\nError: {e.strerror}")
            else:
                print(f"Not present: {flitep}, nothing to remove!")
        elif args[2] == 'deepspeech-data':
            if os.path.exists(dsp) and os.path.isdir(dsp):
                try:
                    shutil.rmtree(dsp)
                    print(f"Removed directory {dsp}")
                except OSError as e:
                    print(f"Cannot remove DeepSpeech data dir {dsp}\nError: {e.strerror}")
            else:
                print(f"Either not present or not a directory: {dsp}, nothing to remove!")
        elif args[2] == 'susi-server':
            println("TODO!!!")
        elif args[2] == 'susi-skillpad':
            println("TODO!!!")
        else: # already checked above, though ...
            raise ValueError(f"incorrect invocation of {args[1]} action", args[2:])

    else:
        raise ValueError("unknown variant of install action", args[2])
コード例 #4
0
    def __init__(self, *args, **kwargs):
        """
        Create Plugin Instance
        """
        plugin.STTPlugin.__init__(self, *args, **kwargs)

        # Check that we have the correct project source downloaded
        # Currently we are only using this for the generate_lm.py
        # script.
        version = "v{}".format(deepspeech.version())
        sourcedir_name = "STT{}".format(version)
        sourcedir_path = paths.sub(os.path.join("sources", sourcedir_name))
        if (not os.path.isdir(sourcedir_path)):
            # use git to download the appropriate source directory
            print(
                "Downloading (cloning) Mozilla DeepSpeech Source to {}".format(
                    sourcedir_path))
            cmd = [
                'git', 'clone', '-b', version,
                'https://github.com/mozilla/STT', sourcedir_path
            ]
            completed_process = run_command(cmd, 2)
            if (completed_process.returncode != 0):
                self._logger.error(completed_process.stderr.decode("UTF-8"))
                exit(1)
        # Download the release binaries. We need to get the
        # generate_scorer_package from here.
        binarydir_name = "native_client{}".format(version)
        binarydir_path = paths.sub(os.path.join("sources", binarydir_name))
        if (not os.path.isdir(binarydir_path)):
            os.makedirs(binarydir_path)
            binary_url = None
            if (platform.machine() == "x86_64"):
                arch = "amd64"
                binary_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/native_client.{}.cpu.linux.tar.xz'.format(
                    version, arch)
            if (platform.machine() == "armv7l"):
                arch = "rpi3"
                binary_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/native_client.{}.cpu.linux.tar.xz'.format(
                    version, arch)
            # Unfortunately, it does not appear that the native client is
            # compiled for x86 machines, so Raspbianx86 users are out of luck
            # for right now.
            if (binary_url is None):
                print(
                    "Architecture not recognized. Please add it to {}".format(
                        __file__))
            else:
                print("Downloading native client binaries from {}".format(
                    binary_url))
                cmd = [
                    'wget', binary_url,
                    '--directory-prefix={}'.format(binarydir_path),
                    '--output-document={}'.format(
                        os.path.join(binarydir_path, 'native_client.tar.xz'))
                ]
                print(" ".join(cmd))
                completed_process = run_command(cmd, 2)
                if (completed_process.returncode == 0):
                    # unzip the archive into the binarydir_path directory
                    cmd = [
                        'tar', 'Jxvf',
                        os.path.join(binarydir_path, 'native_client.tar.xz'),
                        '-C', binarydir_path
                    ]
                    completed_process = run_command(cmd, 2)
                    if (completed_process.returncode != 0):
                        print(completed_process.stderr.decode("UTF-8"))
                else:
                    print(completed_process.stderr.decode("UTF-8"))
        kenlm_sourcedir = paths.sub(os.path.join("sources", "kenlm"))
        if (not os.path.isdir(kenlm_sourcedir)):
            # use git to download kenlm
            print("Cloning KenLM")
            cmd = [
                'git', 'clone', 'https://github.com/kpu/kenlm.git',
                kenlm_sourcedir
            ]
            completed_process = run_command(cmd, 2)
            if (completed_process.returncode == 0):
                # build kenlm
                print("Building KenLM")
                build_dir = os.path.join(kenlm_sourcedir, "build")
                if (not os.path.isdir(build_dir)):
                    os.makedirs(build_dir)
                    os.chdir(build_dir)
                    cmd = ['cmake', '..']
                    completed_process = run_command(cmd, 2, cwd=build_dir)
                    if (completed_process.returncode == 0):
                        cmd = ['make']
                        completed_process = run_command(cmd, 2, cwd=build_dir)
                        if (completed_process.returncode != 0):
                            self._logger.error(
                                completed_process.stderr.decode("UTF-8"))
                            exit(1)
                    else:
                        self._logger.error(
                            completed_process.stderr.decode("UTF-8"))
                        exit(1)
            else:
                self._logger.error(completed_process.stderr.decode("UTF-8"))
                exit(1)

        # Beam width used in the CTC decoder when building candidate
        # transcriptions
        self._BEAM_WIDTH = profile.get(['deepspeech', 'beam_width'], 500)

        # Only 16KHz files are currently supported
        self._FS = profile.get(['deepspeech', 'fs'], 16000)

        # These are paths. They are required.
        # Path to the model (protocol buffer binary file)
        working_dir = os.path.expanduser(
            profile.get(['deepspeech', 'working_dir']))
        if (not os.path.isdir(working_dir)):
            os.makedirs(working_dir)
        download_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/deepspeech-{}-models.pbmm'.format(
            version, deepspeech.version())
        self._MODEL = os.path.join(working_dir,
                                   "model_{}.pbmm".format(version))
        if (platform.machine() == 'armv7l'):
            download_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/deepspeech-{}-models.tflite'.format(
                version, deepspeech.version())
            self._MODEL = os.path.join(working_dir,
                                       "model_{}.tflite".format(version))
        self._logger.info("Model: {}".format(self._MODEL))
        if (not os.path.isfile(self._MODEL)):
            print("Downloading {}".format(download_url))
            # FIXME it would be good to have a progress indicator here.
            # This can take a long time depending on your bandwidth.
            self._MODEL = app_utils.download_file(download_url, self._MODEL)
            print("Saved as {}".format(self._MODEL))
            print("Download completed")
        self._ds = deepspeech.Model(self._MODEL)
        scorer_file = os.path.join(self.compile_vocabulary(generate_scorer),
                                   "scorer")
        self._ds.enableExternalScorer(scorer_file)
コード例 #5
0
def download_models(model_type="pbmm",
                    version=None,
                    include_scorer=False,
                    overwrite=False):
    if str(model_type).lower() in ["pbmm", "tflite"]:

        if version is None:
            try:
                import deepspeech
                version = deepspeech.version()
            except Exception:
                pass

        if version is None:
            import subprocess
            try:
                text = subprocess.getoutput("pip3 show deepspeech")
                if text is not None:
                    lines = text.split("\n")
                    for line in lines:
                        if "Version: " in line:
                            version = line.replace("Version: ", "").strip()
                            if version == "":
                                version = None
            except Exception:
                pass

            if version is None:
                try:
                    text = subprocess.getoutput("pip show deepspeech")
                    if text is not None:
                        lines = text.split("\n")
                        for line in lines:
                            if "Version: " in line:
                                version = line.replace("Version: ", "").strip()
                                if version == "":
                                    version = None
                except Exception:
                    pass

        if version is None:
            print("Unable to determine deepspeech version.")
            quit(1)
        else:
            print("Identified deepspeech version as " + version)

        model_url = "https://github.com/mozilla/DeepSpeech/releases/download/v" + version + "/deepspeech-" + version + "-models." + str(
            model_type).lower()
        scorer_url = "https://github.com/mozilla/DeepSpeech/releases/download/v" + version + "/deepspeech-" + version + "-models.scorer"

        print("Downloading", model_url)
        ret = _downloadFile(model_url, "speech", overwrite=overwrite)

        if ret and include_scorer:
            print("Downloading", scorer_url)
            ret = _downloadFile(scorer_url, "speech", overwrite=overwrite)

        if not ret:
            print("An error occurred downloading the models.")

        return ret

    else:
        logging.error("Model type (" + str(model_type) + ") not expected.")
        return False