def main(): if len(argv) != 4: exit("Usage: SoundPrintCollector.py <dict> <config> <output-folder>") num_repeat_key = 'NUMREPEAT' # Get configuration num_repeat = ParseConfig(CONFIG_DIR + argv[2] + '.conf', num_repeat_key) if num_repeat != '': num_repeat = int(num_repeat) else: num_repeat = 1 words, model_id = GetDictionary(DICT_DIR + argv[1] + '.txt') total_num = len(words) * num_repeat OUTPUT_DIR = TRAIN_DIR + argv[3] + '/' if not isdir(OUTPUT_DIR): mkdir(OUTPUT_DIR) # Collect sound print for single model for i in range(len(words)): for k in range(num_repeat): total_num -= 1 print(str(total_num) + ' transcript(s) remaining.') if words[i].find('!') > -1: instruction = 'Press <Enter> to record background noise.\n' + words[ i] else: instruction = 'Get ready to speak the following script and press <Enter> to start record.\n' + words[ i] + '\n Remember to leave 3 seconds of blank before and after the utterance.\n' Collect(OUTPUT_DIR + model_id[i] + '-' + str(k) + '.wav', instruction) print('Done\a')
import numpy as np from x64.Release.TrainCore import VDecode from glob import glob from pdb import set_trace from os import getcwd MAIN_DIR = getcwd() + '/' MFCC_FOLDER = MAIN_DIR + 'mfcc/single/' MODEL_FOLDER = MAIN_DIR + 'model/' DICT_DIR = MAIN_DIR + 'dict/' CONFIG_DIR = MAIN_DIR + 'config/' # Get iteration time limit from config # Get configuration conf_filename = CONFIG_DIR + sys.argv[2] + '.conf' max_iter = int(ParseConfig(conf_filename, 'MAXITER')) ######################################################################### # MAIN ENTRY # ######################################################################### if len(sys.argv) < 3: sys.exit("Usage: Decoder.py <dict> <config>") words, model_id = GetDictionary(DICT_DIR + sys.argv[1] + '.txt') models = [] for k in range(len(model_id)): # Load model model_filename = MODEL_FOLDER + model_id[k] + '.xml' name, states, num_states, num_components, dim_observation, log_trans, log_coef, mean, log_var = ReadModel( model_filename)
######################################################################### # MAIN ENTRY # ######################################################################### if len(argv) != 4: exit("Usage: ModelIitializer.py <dict> <config> <mfcc-dir>") MFCC_FOLDER = MAIN_DIR + 'mfcc/train/'+argv[3]+'/' words, model_id = GetDictionary(DICT_DIR + argv[1] + '.txt') num_components_key = 'NUMCOMPONENTS' num_repeat_key = 'NUMREPEAT' # Get configuration conf_filename = CONFIG_DIR + argv[2] + '.conf' num_components = ParseConfig(conf_filename, num_components_key) num_repeat = ParseConfig(conf_filename, num_repeat_key) if num_components != '': num_components = int(num_components) else: # Use only 1 component in GMM by default num_components = 1 if num_repeat != '': num_repeat = int(num_repeat) else: num_repeat = 1 # For each model for k in range(len(model_id)): # Load MFCC data # To compute the global mean and log_var
######################################################################### if len(argv) != 3: exit("Usage: ModelCreator.py <dict> <config>") dict_file = open(DICT_DIR + argv[1] + '.txt') tokens_list = [] for line in dict_file: tokens_list.append(line.strip().split()) dict_file.close() num_subphones_key = 'NUMSUBPHONES' # Get configuration num_subphones = int(ParseConfig(CONFIG_DIR + argv[2] + '.conf',num_subphones_key)) for tokens in tokens_list: ID = tokens[0] name = tokens[1] states = ['<START>'] for i in range(3, len(tokens)): for k in range(num_subphones): states.append('<' + tokens[i] + str(k)+'>') states.append('<END>') num_states = len(states) # Get flat start log transition matrix row_idx = get_row_idx(num_states)