def copy_files(self): run_cmds(f"cd {self.audio_path} && cp -R * {self.tuned_path}") # Using bash instead of python to closely follow directions on # https://cmusphinx.github.io/wiki/tutorialadapt/ for _dir in ["en-us", "cmudict-en-us.dict", "en-us.lm.bin"]: path = os.path.join(self.model_path, _dir) run_cmds(f"cp -a {path} {self.tuned_path}")
def write_test_files(self): makedirs(self.test_dir) wav_dir = os.path.join(self.test_dir, "wav/") makedirs(wav_dir) old_lm = os.path.join(self.tuned_path, "en-us.lm.bin") new_lm = os.path.join(self.test_dir, "en-us.lm.bin") old_dict = os.path.join(self.tuned_path, "cmudict-en-us.dict") new_dict = os.path.join(self.test_dir, "cmudict-en-us.dict") old_hmm = os.path.join(self.tuned_path, "en-us") new_hmm = os.path.join(self.test_dir, "en-us") old_mllr_matrix = os.path.join(self.tuned_path, "mllr_matrix") new_mllr_matrix = os.path.join(self.test_dir, "mllr_matrix") run_cmds( [ f"cd {self.tuned_path}", f"cp {self.file_ids_path} {self.test_file_ids_path}", (f"cp {self.transcription_path} " f"{self.test_transcription_path}"), f"cp *wav {wav_dir}", f"cp {old_lm} {new_lm}", f"cp {old_dict} {new_dict}", f"cp -R {old_hmm} {new_hmm}", #f"cp {old_mllr_matrix} {new_mllr_matrix}", f"cp {self.tuned_path}/sphinxtrain/scripts/decode/word_align.pl ./test/" ], stdout=True)
def run_sphinx_fe(self): run_cmds([ f"cd {self.tuned_path}", (f"sphinx_fe -argfile en-us/feat.params " f"-samprate 16000 -c {self.file_ids_path} " "-di . -do . -ei wav -eo mfc -mswav yes") ], stdout=True)
def run_mllr(self): # NOTE: not nearly as effective as run_adapt. # Now we just use that instead run_cmds([ f"cd {self.tuned_path}", ("./mllr_solve\\\n" " -meanfn en-us/means \\\n" " -varfn en-us/variances \\\n" " -outmllrfn mllr_matrix -accumdir .") ], stdout=True)
def run_adapt(self): cmds = [ f"cd {self.tuned_path}", "cp -R en-us en-us-adapt", ("./map_adapt -moddeffn en-us/mdef.txt -ts2cbfn .cont. " "-meanfn en-us/means -varfn en-us/variances -mixwfn " "en-us/mixture_weights -tmatfn en-us/transition_matrices " "-accumdir . -mapmeanfn en-us-adapt/means -mapvarfn " "en-us-adapt/variances -mapmixwfn " "en-us-adapt/mixture_weights -maptmatfn " "en-us-adapt/transition_matrices") ] run_cmds(cmds, stdout=True)
def download_sphinxtrain(self): # Must get installed from source for fixes run_cmds([ f"cd {self.tuned_path}", "git clone [email protected]:cmusphinx/sphinxtrain.git", "cd sphinxtrain", "./autogen.sh", f"make -j {cpu_count()}", "sudo make install" ], stdout=True) for fname in ["bw", "map_adapt", "mk_s2sendump", "mllr_solve"]: old_path = os.path.join("/usr/local/libexec/sphinxtrain/", fname) new_path = os.path.join(self.tuned_path, fname) run_cmds(f"cp {old_path} {new_path}")
def convert_mdef(self): #run_cmds("sudo apt -y install pocketsphinx") run_cmds([ f"cd {self.tuned_path}", "git clone [email protected]:cmusphinx/pocketsphinx.git", "cd pocketsphinx", "./autogen.sh", f"make -j {cpu_count()}", "sudo make install" ], stdout=True) tool_path = os.path.join(self.tuned_path, "pocketsphinx", "src", "programs", "pocketsphinx_mdef_convert") path = os.path.join(self.tuned_path, "en-us/mdef") run_cmds(f"{tool_path} -text {path} {path}.txt", stdout=True)
def install_sphinx_base(self): # https://bangladroid.wordpress.com/2017/02/16/installing-cmu-sphinx-on-ubuntu/ run_cmds("sudo apt-get install -y gcc automake autoconf libtool " "bison swig python-dev libpulse-dev") sphinx_path = os.path.join(self.tuned_path, "sphinx-src") makedirs(sphinx_path, remake=True) url = "https://github.com/cmusphinx/sphinxbase.git" # sudo is used on the first command to ensure it's use run_cmds([ f"sudo ls ", f"cd {sphinx_path}", f"git clone {url}", "cd sphinxbase", "./autogen.sh", f"make -j {cpu_count()}", "sudo make install", f"cp src/sphinx_fe/sphinx_fe {self.tuned_path}" ], stdout=True)
def _install_chromedriver(self): path = '/'.join(self.driver_path.split('/')[:-1]) if os.path.exists(self.driver_path): delete_paths(self.driver_path) # Installs chromedriver # https://gist.github.com/mikesmullin/2636776#gistcomment-2608206 cmd = ("LATEST_VERSION=$(curl -s " "https://chromedriver.storage.googleapis.com/LATEST_RELEASE) &&" " wget -O /tmp/chromedriver.zip " "https://chromedriver.storage.googleapis.com/$LATEST_VERSION/" "chromedriver_linux64.zip && " "unzip /tmp/chromedriver.zip " f"chromedriver -d {path};") run_cmds([cmd])
def download_proper_en(self): url = ("https://phoenixnap.dl.sourceforge.net/project/cmusphinx/" "Acoustic%20and%20Language%20Models/US%20English/" "cmusphinx-en-us-5.2.tar.gz") path = os.path.join(self.tuned_path, "larger_sphinx.tar.gz") logging.info("downloading file, this may take a while") download_file(url, path) logging.info("downloaded") with tarfile.open(path) as f: old_en_us_path = os.path.join(self.tuned_path, "en-us") delete_paths(old_en_us_path) f.extractall(old_en_us_path) run_cmds([ f"cd {old_en_us_path}", f"mv * old_folder", "cd old_folder", "mv * ..", "rm -rf old_folder" ], stdout=True)
def run_bw(self): run_cmds( [ f"cd {self.tuned_path}", ( "sudo ./bw \\\n" " -hmmdir en-us \\\n" " -moddeffn en-us/mdef.txt \\\n" #"-ts2cbfn .ptm. \\\n" " -ts2cbfn .cont. \\\n" " -feat 1s_c_d_dd \\\n" #"-svspec 0-12/13-25/26-38 \\\n" " -lda en-us/feature_transform \\\n" " -cmn current \\\n" " -agc none \\\n" " -dictfn cmudict-en-us.dict \\\n" f" -ctlfn {self.file_ids_name} \\\n" f" -lsnfn {self.transcription_name} \\\n" " -accumdir .") ], stdout=True)
def run_test_decoder(self): for adapt in [False, True]: if adapt: run_cmds([ f"cd {self.tuned_path}", f"rm -rf test/en-us", f"cp -R en-us-adapt test/en-us" ]) tool_path = os.path.join(self.tuned_path, "pocketsphinx", "src", "programs", "pocketsphinx_batch") run_cmds( ( f"cd {self.test_dir} && \\\n" f"{tool_path} \\\n" f" -adcin yes \\\n" f" -cepdir wav \\\n" f" -cepext .wav \\\n" f" -ctl test.fileids \\\n" f" -lm en-us.lm.bin \\\n" f" -dict cmudict-en-us.dict \\\n" f" -hmm en-us \\\n" # for example en-us f" -hyp test.hyp"), stdout=True) run_cmds([ f"cd {self.test_dir}", "perl word_align.pl test.transcription test.hyp" ], stdout=True) input(f"test complete with adapt as {adapt}, hit enter")
def _install_google_chrome(self): run_cmds(["sudo apt-get update -y"]) run_cmds(["sudo apt-get upgrade -y"]) chrome_install_base = "/tmp" chrome_install_name = "google-chrome-stable_current_amd64.deb" chrome_install_path = os.path.join(chrome_install_base, chrome_install_name) if os.path.exists(chrome_install_path): delete_paths(chrome_install_path) run_cmds([ f"cd {chrome_install_base}", ("wget https://dl.google.com/linux/direct/" f"{chrome_install_name}"), f"sudo apt install ./{chrome_install_name}" ])
def run(self): run_cmds("sudo apt -y install pocketsphinx") self.generate_new_model() self.test_new_model() input("backup audio in /etc/audio then press enter")