model_filename = MODEL_FOLDER + model_id[k] + '.xml' name, states, num_states, num_components, dim_observation, log_trans, log_coef, mean, log_var = ReadModel( model_filename) model = [log_trans, log_coef, mean, log_var] models.append(model) count = 0 sum = 0 # For each model for k in range(len(model_id)): # Load MFCC data # To compute the global mean and log_var MFCC_filename_list = glob(MFCC_FOLDER + '*' + model_id[k] + '*.txt') feat_list = [] for filename in MFCC_filename_list: feat_list.append(load(filename)) dim_observation = len(feat_list[0][0]) print('VDecode: ' + str(k + 1) + '/' + str(len(model_id))) for i in range(len(feat_list)): #print('VDecode: Using ' + str(i + 1) + '/' + str(len(feat_list)) + ' recording') ans = VDecode(models, feat_list[i]) print('VDecode: predict = ' + str(ans) + ', real = ' + str(k)) # Wrong prediction if ans != k: count += 1 sum += 1 print('Error Rate = ' + str(float(count) / sum))
MAIN_DIR = getcwd()+'/' WAVE_FOLDER = MAIN_DIR + 'wav/' TEST_FOLDER = WAVE_FOLDER + 'test/' CONFIG_DIR = MAIN_DIR + 'config/' DICT_DIR = MAIN_DIR + 'dict/' MFCC_DIR = MAIN_DIR + 'mfcc/test/' MODEL_FOLDER = MAIN_DIR + 'model/' if len(sys.argv) != 2: sys.exit("Usage: Recognizer.py <dict>") words, model_id = GetDictionary(DICT_DIR + sys.argv[1] + '.txt') models = LoadModels(model_id,MODEL_FOLDER) instruction = 'Get ready to speak (0~9) and press <Enter> to start record.\n Remember to leave 3 seconds of blank before and after the utterance.' filename = TEST_FOLDER +'temp.wav' Collect(filename,instruction) signal = preprocess(filename) cooked_filename = TEST_FOLDER+'cooked.wav' signal.write(cooked_filename) cook(cooked_filename,MFCC_DIR) mfcc_filename = MFCC_DIR+split(cooked_filename)[1].replace('wav','txt') mfcc = load(mfcc_filename) ans = BWDecode(models,mfcc) print('BWDecode: predict = '+words[ans])