def generate_scorer(directory, phrases): print("Generating scorer: {}".format(directory)) languagemodel_path = get_languagemodel_path(directory) with open(languagemodel_path, "w") as f: f.writelines(line_generator(phrases)) version = "v{}".format(deepspeech.version()) sourcedir_name = "STT{}".format(version) sourcedir_path = paths.sub(os.path.join("sources", sourcedir_name)) kenlm_sourcedir = paths.sub(os.path.join("sources", "kenlm")) kenlm_bins = os.path.join(kenlm_sourcedir, 'build', 'bin') cmd = [ 'python', os.path.join(sourcedir_path, 'data', 'lm', 'generate_lm.py'), '--input_txt', languagemodel_path, '--output_dir', directory, '--kenlm_bins', kenlm_bins, '--binary_type', 'trie', '--top_k', '500000', '--arpa_order', '5', '--max_arpa_memory', '10%', '--arpa_prune', '0|0|1', '--binary_a_bits', '255', '--binary_q_bits', '8', '--discount_fallback' ] completed_process = run_command(cmd, 2) if (completed_process.returncode == 0): # There should be an additional step here where the # default values of alpha and beta are generated by # lm_optimizer. However, that involves building a # dev set from data in the audiolog. # Generate the scorer package # I'm still trying to figure out where to get the # generate_scorer_package binary. I found mine in the # ~/.cache/bazel folder after running bazel build # on //native_client:generate_scorer_package # I think there are binary packages available on # github, but you have to know your processor type, # and since I am testing on Raspbianx86, I have to # build it. x86_64 or rpi are available on Github. print("Generating scorer package") binarydir_name = "native_client{}".format(version) binarydir_path = paths.sub( os.path.join("sources", binarydir_name, "generate_scorer_package")) cmd = [ binarydir_path, '--alphabet', os.path.join(sourcedir_path, 'data', 'alphabet.txt'), '--lm', os.path.join(directory, 'lm.binary'), '--vocab', os.path.join(directory, 'vocab-500000.txt'), '--package', os.path.join(directory, 'scorer'), '--default_alpha', '0.931289105002', '--default_beta', '1.18341375810284' ] completed_process = run_command(cmd, 2) if (completed_process.returncode != 0): print(completed_process.stderr.decode("UTF-8")) else: print(completed_process.stderr.decode("UTF-8")) print(os.path.join(directory, 'scorer')) return os.path.join(directory, 'scorer')
def __call__(self, *args, **kwargs): print('DeepSpeech ', version()) exit(0)
def install_uninstall(args): if len(args) < 3: raise ValueError(f"incorrect invocation of {args[1]} action", args[2:]) if args[2] == 'flite-data' or args[2] == 'susi-server' or args[2] == 'susi-skillpad': if len(args) > 3: raise ValueError(f"incorrect invocation of {args[1]} action", args[2:]) elif args[2] == 'deepspeech-data': if len(args) > 4: raise ValueError(f"incorrect invocation of {args[1]} action", args[2:]) elif len(args) == 4 and args[3] != "en-US": raise ValueError(f"currently only en_US is supported language for DeepSpeech data", args[2:]) else: raise ValueError(f"incorrect invocation of {args[1]} action", args[2:]) cfg = SusiConfig() basepath = cfg.get('path.base') flitep = cfg.get('path.flite_speech') dsp = cfg.get('path.deepspeech') susihome = cfg.get('path.susi_home') if not os.path.isabs(flitep): flitep = os.path.abspath(os.path.join(basepath, flitep)) if not os.path.isabs(dsp): dsp = os.path.abspath(os.path.join(basepath, dsp, "en-US")) if args[1] == 'install': if args[2] == 'flite-data': if not os.path.exists(flitep): download_file_with_progress(FLITE_URL, flitep) print(f"Successfully installed {flitep}") else: print(f"Already available: {flitep}") elif args[2] == 'deepspeech-data': # need to find the currently used deepspeech version number import deepspeech ds_version = deepspeech.version() if not os.path.exists(dsp): os.makedirs(dsp) for ext in ['pbmm', 'tflite', 'scorer']: fn = f"deepspeech-{ds_version}-models.{ext}" url = f"{DEEPSPEECH_URL % ds_version}/{fn}" dest = os.path.join(dsp, fn) if not os.path.exists(dest): download_file_with_progress(url, dest) else: print(f"Already available: {dest}") print(f"Successfully install DeepSpeech model files in {dsp}") elif args[2] == 'susi-server': println("TODO!!!") elif args[2] == 'susi-skillpad': println("TODO!!!") else: # already checked above, though ... raise ValueError(f"incorrect invocation of {args[1]} action", args[2:]) elif args[1] == 'uninstall': if args[2] == 'flite-data': if os.path.exists(flitep): try: os.remove(flitep) print(f"Removed {flitep}") except OSError as e: print(f"Cannot remove flite file {flitep}\nError: {e.strerror}") else: print(f"Not present: {flitep}, nothing to remove!") elif args[2] == 'deepspeech-data': if os.path.exists(dsp) and os.path.isdir(dsp): try: shutil.rmtree(dsp) print(f"Removed directory {dsp}") except OSError as e: print(f"Cannot remove DeepSpeech data dir {dsp}\nError: {e.strerror}") else: print(f"Either not present or not a directory: {dsp}, nothing to remove!") elif args[2] == 'susi-server': println("TODO!!!") elif args[2] == 'susi-skillpad': println("TODO!!!") else: # already checked above, though ... raise ValueError(f"incorrect invocation of {args[1]} action", args[2:]) else: raise ValueError("unknown variant of install action", args[2])
def __init__(self, *args, **kwargs): """ Create Plugin Instance """ plugin.STTPlugin.__init__(self, *args, **kwargs) # Check that we have the correct project source downloaded # Currently we are only using this for the generate_lm.py # script. version = "v{}".format(deepspeech.version()) sourcedir_name = "STT{}".format(version) sourcedir_path = paths.sub(os.path.join("sources", sourcedir_name)) if (not os.path.isdir(sourcedir_path)): # use git to download the appropriate source directory print( "Downloading (cloning) Mozilla DeepSpeech Source to {}".format( sourcedir_path)) cmd = [ 'git', 'clone', '-b', version, 'https://github.com/mozilla/STT', sourcedir_path ] completed_process = run_command(cmd, 2) if (completed_process.returncode != 0): self._logger.error(completed_process.stderr.decode("UTF-8")) exit(1) # Download the release binaries. We need to get the # generate_scorer_package from here. binarydir_name = "native_client{}".format(version) binarydir_path = paths.sub(os.path.join("sources", binarydir_name)) if (not os.path.isdir(binarydir_path)): os.makedirs(binarydir_path) binary_url = None if (platform.machine() == "x86_64"): arch = "amd64" binary_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/native_client.{}.cpu.linux.tar.xz'.format( version, arch) if (platform.machine() == "armv7l"): arch = "rpi3" binary_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/native_client.{}.cpu.linux.tar.xz'.format( version, arch) # Unfortunately, it does not appear that the native client is # compiled for x86 machines, so Raspbianx86 users are out of luck # for right now. if (binary_url is None): print( "Architecture not recognized. Please add it to {}".format( __file__)) else: print("Downloading native client binaries from {}".format( binary_url)) cmd = [ 'wget', binary_url, '--directory-prefix={}'.format(binarydir_path), '--output-document={}'.format( os.path.join(binarydir_path, 'native_client.tar.xz')) ] print(" ".join(cmd)) completed_process = run_command(cmd, 2) if (completed_process.returncode == 0): # unzip the archive into the binarydir_path directory cmd = [ 'tar', 'Jxvf', os.path.join(binarydir_path, 'native_client.tar.xz'), '-C', binarydir_path ] completed_process = run_command(cmd, 2) if (completed_process.returncode != 0): print(completed_process.stderr.decode("UTF-8")) else: print(completed_process.stderr.decode("UTF-8")) kenlm_sourcedir = paths.sub(os.path.join("sources", "kenlm")) if (not os.path.isdir(kenlm_sourcedir)): # use git to download kenlm print("Cloning KenLM") cmd = [ 'git', 'clone', 'https://github.com/kpu/kenlm.git', kenlm_sourcedir ] completed_process = run_command(cmd, 2) if (completed_process.returncode == 0): # build kenlm print("Building KenLM") build_dir = os.path.join(kenlm_sourcedir, "build") if (not os.path.isdir(build_dir)): os.makedirs(build_dir) os.chdir(build_dir) cmd = ['cmake', '..'] completed_process = run_command(cmd, 2, cwd=build_dir) if (completed_process.returncode == 0): cmd = ['make'] completed_process = run_command(cmd, 2, cwd=build_dir) if (completed_process.returncode != 0): self._logger.error( completed_process.stderr.decode("UTF-8")) exit(1) else: self._logger.error( completed_process.stderr.decode("UTF-8")) exit(1) else: self._logger.error(completed_process.stderr.decode("UTF-8")) exit(1) # Beam width used in the CTC decoder when building candidate # transcriptions self._BEAM_WIDTH = profile.get(['deepspeech', 'beam_width'], 500) # Only 16KHz files are currently supported self._FS = profile.get(['deepspeech', 'fs'], 16000) # These are paths. They are required. # Path to the model (protocol buffer binary file) working_dir = os.path.expanduser( profile.get(['deepspeech', 'working_dir'])) if (not os.path.isdir(working_dir)): os.makedirs(working_dir) download_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/deepspeech-{}-models.pbmm'.format( version, deepspeech.version()) self._MODEL = os.path.join(working_dir, "model_{}.pbmm".format(version)) if (platform.machine() == 'armv7l'): download_url = 'https://github.com/mozilla/DeepSpeech/releases/download/{}/deepspeech-{}-models.tflite'.format( version, deepspeech.version()) self._MODEL = os.path.join(working_dir, "model_{}.tflite".format(version)) self._logger.info("Model: {}".format(self._MODEL)) if (not os.path.isfile(self._MODEL)): print("Downloading {}".format(download_url)) # FIXME it would be good to have a progress indicator here. # This can take a long time depending on your bandwidth. self._MODEL = app_utils.download_file(download_url, self._MODEL) print("Saved as {}".format(self._MODEL)) print("Download completed") self._ds = deepspeech.Model(self._MODEL) scorer_file = os.path.join(self.compile_vocabulary(generate_scorer), "scorer") self._ds.enableExternalScorer(scorer_file)
def download_models(model_type="pbmm", version=None, include_scorer=False, overwrite=False): if str(model_type).lower() in ["pbmm", "tflite"]: if version is None: try: import deepspeech version = deepspeech.version() except Exception: pass if version is None: import subprocess try: text = subprocess.getoutput("pip3 show deepspeech") if text is not None: lines = text.split("\n") for line in lines: if "Version: " in line: version = line.replace("Version: ", "").strip() if version == "": version = None except Exception: pass if version is None: try: text = subprocess.getoutput("pip show deepspeech") if text is not None: lines = text.split("\n") for line in lines: if "Version: " in line: version = line.replace("Version: ", "").strip() if version == "": version = None except Exception: pass if version is None: print("Unable to determine deepspeech version.") quit(1) else: print("Identified deepspeech version as " + version) model_url = "https://github.com/mozilla/DeepSpeech/releases/download/v" + version + "/deepspeech-" + version + "-models." + str( model_type).lower() scorer_url = "https://github.com/mozilla/DeepSpeech/releases/download/v" + version + "/deepspeech-" + version + "-models.scorer" print("Downloading", model_url) ret = _downloadFile(model_url, "speech", overwrite=overwrite) if ret and include_scorer: print("Downloading", scorer_url) ret = _downloadFile(scorer_url, "speech", overwrite=overwrite) if not ret: print("An error occurred downloading the models.") return ret else: logging.error("Model type (" + str(model_type) + ") not expected.") return False