def get_predictions(index, partition, input_to_softmax, model_path, spectrogram_features=True): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights return the predicted probability matrix (in a 2D matrix) and the ground truth """ # load the train and test data data_gen = AudioGenerator(spectrogram=spectrogram_features) data_gen.load_train_data() data_gen.load_validation_data() # obtain the true transcription and the audio features if partition == 'validation': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] return (prediction[0], transcr, audio_path)
def get_predictions(index, partition, input_to_softmax, model_path): ''' Get the model's decoded predictions to caculate metrics ''' # load the train and test data data_gen = AudioGenerator() data_gen.load_train_data() data_gen.load_validation_data() # obtain the true transcription and the audio features if partition == 'valid': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid') # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() label = transcr predicted = ''.join(int_sequence_to_text(pred_ints)) return label, predicted
def compare_predictions(index, partition, inputs_to_softmax=[], model_paths=[], phn=False): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data data_gen = AudioGenerator() data_gen.load_train_data() data_gen.load_test_data() # obtain the true transcription and the audio features if partition == 'test': if phn: transcr = data_gen.test_phn_texts[index] audio_path = data_gen.test_phn_audio_paths[index] elif not phn: transcr = data_gen.test_wrd_texts[index] audio_path = data_gen.test_wrd_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': if phn: transcr = data_gen.train_phn_texts[index] audio_path = data_gen.train_phn_audio_paths[index] elif not phn: transcr = data_gen.train_wrd_texts[index] audio_path = data_gen.train_wrd_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions pred_ints = [] for model_path, input_to_softmax in zip(model_paths, inputs_to_softmax): input_to_softmax.load_weights(model_path) prediction = input_to_softmax.predict( np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_int = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() pred_ints.append(pred_int) # play the audio file, and display the true and predicted transcriptions print('-' * 80) Audio(audio_path) print('True transcription:\n' + '\n' + transcr) print('-' * 80) i = 0 for pred_in in pred_ints: i = i + 1 print('Predicted transcription number', i, ':\n' + '\n' + ''.join(int_sequence_to_text(pred_in, phn))) print('-' * 80)
def get_predictions(index, partition, trained_model, model_path): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' trained_model (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data data_gen = AudioGenerator(spectrogram=True) data_gen.load_train_data() data_gen.load_validation_data() # obtain the true transcription and the audio features from Dataset if partition == 'validation': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') print("Trained model output length:\n" + str(trained_model.output_length(data_point.shape[0]))) # obtain and decode the acoustic model's predictions trained_model.load_weights(model_path) prediction = trained_model.predict(np.expand_dims(data_point, axis=0)) output_length = [trained_model.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() transcription = ''.join(int_sequence_to_text(pred_ints)) # Correction using KenLM language model toolkit corrected_transcription = correction(transcription) print('-' * 80) print(repr(audio_path).replace(r"\\", r"/")) print('True transcription:\n' + '\n' + transcr) print('-' * 80) print('Raw prediction:\n' + str(prediction[0])) print('CTC Decoded predicted Ints before conversion to text:\n' + str(pred_ints)) print('Predicted transcription:\n' + '\n' + transcription) print('Predicted transcription with correction:\n' + corrected_transcription) print('-' * 80)
def predict_test(input_to_softmax, model_path, audio_range=100000): ''' Method for predicting the testing set. Set default audio_range to be 100000. (If it's over the index, it will return an error anyway). ''' data_gen = AudioGenerator() data_gen.load_train_data() data_gen.load_test_data() transcr = data_gen.test_texts audio_path = data_gen.test_audio_paths input_to_softmax.load_weights(model_path) predictions = [] try: for i in range(len(audio_path)): #default len(audio_path)): data_point = data_gen.normalize(data_gen.featurize(audio_path[i])) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode( prediction, output_length)[0][0])+1).flatten().tolist() pred = ''.join(int_sequence_to_text(pred_ints)) predictions.append(pred) except: predictions = ''.join(predictions) transcr = transcr[:10] transcr = ''.join(transcr) with open("predictions/predictions.txt", "w") as output: output.write(str(predictions)) with open("predictions/truescr.txt", "w") as output: output.write(str(transcr))
def get_predictions(index, partition, input_to_softmax, model_path): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data data_gen = AudioGenerator() data_gen.load_train_data() data_gen.load_validation_data() print(len(data_gen.valid_texts)) # obtain the true transcription and the audio features if partition == 'validation': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() if not data_gen.valid_texts[index]: return with open(r'D:\DIPLOMSKA\results\predictions_cnn_rnn_12.txt', 'a+', encoding='utf8') as fp: fp.write('True transcription:\n' + '\n' + transcr + '\n') #print(transcr) fp.write('-' * 30 + '\n') fp.write('Predicted transcription:\n' + '\n' + ''.join(int_sequence_to_text(pred_ints)) + '\n') #print(int_sequence_to_text(pred_ints)) fp.write('-' * 30 + '\n')
def get_predictions(index, partition, input_to_softmax, model_path, spectogram=True, mfcc_dim=13): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data data_gen = AudioGenerator(spectrogram=spectogram, mfcc_dim=mfcc_dim) data_gen.load_train_data() data_gen.load_validation_data() # obtain the true transcription and the audio features if partition == 'validation': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions input_to_softmax.load_weights('results/' + model_path) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() # play the audio file, and display the true and predicted transcriptions print('-' * 80) Audio(audio_path) print('True transcription:\n' + '\n' + transcr) print('-' * 80) print('Predicted transcription:\n' + '\n' + ''.join(int_sequence_to_text(pred_ints))) print('-' * 80)
def get_predictions(data_gen: AudioGenerator, model, partition, index, omit_true=False, print_line=True): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' model (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data # data_gen = AudioGenerator() # data_gen.load_train_data() # data_gen.load_validation_data() # obtain the true transcription and the audio features if data_gen is None: print("Data Generator is None!") if partition == 'validation': transcription = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcription = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions prediction = model.predict(np.expand_dims(data_point, axis=0)) output_length = [model.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode( prediction, output_length)[0][0]) + 1).flatten().tolist() # play the audio file, and display the true and predicted transcriptions # Audio(audio_path) input_type = "SPEC" if data_gen.spectrogram else "MFCC" if not omit_true: print('TRUE: ' + transcription) print('PRED ' + input_type + ': ' + ''.join(int_sequence_to_text(pred_ints))) if print_line: print('-' * 82) return audio_path
def get_predictions(index, partition, input_to_softmax, model_path): """ Print a model's decoded predictions · Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data starttime = time.time() data_gen = AudioGenerator(spectrogram=True) data_gen.load_train_data() data_gen.load_validation_data() # obtain the true transcription and the audio features if partition == 'validation': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) #print(input_to_softmax.summary()) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode( prediction, output_length, greedy=True)[0][0]) + 1).flatten().tolist() Audio(audio_path) print('-' * 80) b = "".join(int_sequence_to_text(pred_ints)) a = transcr print("Greedy_predictions:\n" + '\n' + ''.join(int_sequence_to_text(pred_ints))) print('1. Word Error Rate for ASR ==', wer(a, b)*100, '%') endtime = time.time() print('2. Real Time Factor for ASR ==',(endtime - starttime)/data_gen.valid_durations[index],'\n') print('-' * 80)
def lexcion_predictions(index, partition, input_to_softmax, model_path): """ Print a model's decoded predictions · Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data starttime = time.time() data_gen = AudioGenerator(spectrogram=True) data_gen.load_train_data() data_gen.load_validation_data() # obtain the true transcription and the audio features if partition == 'validation': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) #print(input_to_softmax.summary()) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) print('-' * 80) Audio(audio_path) print('True transcription:\n' + '\n' + transcr) print('-' * 80) b = testline(prediction[0]) a = transcr print("TokenPassing_predictions:\n") print(b + '\n') print('1. Word Error Rate for ASR ==', wer(a, b)*100, '%') endtime = time.time() print('2. Real Time Factor for ASR ==',(endtime - starttime)/data_gen.valid_durations[index])
def get_predictions_rec(input_to_softmax, a_path, model_path): data_gen = AudioGenerator(spectrogram=False) data_gen.load_train_data() data_gen.load_validation_data() audio_path = a_path data_point = data_gen.normalize(data_gen.featurize(audio_path)) # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() return 'Predicted transcription:\n' + '\n' + ''.join( int_sequence_to_text(pred_ints))
def get_predictions(audio_path, input_to_softmax, model_path): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # print("OK"); # return; # load the train and test data data_gen = AudioGenerator(spectrogram=False, mfcc_dim=13) # read and get features # audio_path = "./samples/16/19/16-19-0159.wav" # print("audio_path:{}".format(audio_path)) # data not normalized yet data_point = data_gen.featurize(audio_path) # print("shape:{}".format(data_gen.featurize(audio_path).shape)) # print("feats_mean: {}".format(data_gen.feats_mean)) # print("feats_std: {}".format(data_gen.feats_std)) # print("feats_mean: {}".format(data_gen.feats_mean.shape)) # print("feats_std: {}".format(data_gen.feats_std.shape)) feats_mean = np.array([ 14.81652005, -0.1802923, -1.22285122, 0.87062853, -16.05643781, -14.03943633, -5.7298706, -15.52425927, -3.39637537, -3.85226744, -5.17435844, -2.13766871, -11.39111645 ]) feats_std = np.array([ 7.16816358, 14.58747728, 11.99928947, 15.69431836, 14.45918537, 16.79930368, 13.98395715, 12.60133111, 11.61310503, 11.34526655, 12.01205471, 13.41467652, 10.89021869 ]) # print("feats_mean: {}".format(feats_mean)) # print("feats_std: {}".format(feats_std)) # print("feats_mean: {}".format(feats_mean.shape)) # print("feats_std: {}".format(feats_std.shape)) # print(data_gen.featurize(audio_path).shape) # normalize data eps = 1e-14 data_point = (data_point - feats_mean) / (feats_std + eps) # data_point = data_gen.normalize(data_gen.featurize(audio_path)) # print("data_point,shape:{}".format(data_point.shape)) # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() recognized_text = "".join(int_sequence_to_text(pred_ints)) print(recognized_text)
def get_group_predictions(input_to_softmax, model_path, partition): starttime = time.time() wer_sum = 0 data_gen = AudioGenerator(spectrogram=True) data_gen.load_train_data() data_gen.load_validation_data() input_to_softmax.load_weights(model_path) # obtain the true transcription and the audio features if partition == 'validation': num = 99 while num >= 0: index = random.randint(1, 2500) transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) # obtain and decode the acoustic model's predictions prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length, greedy=False, beam_width=100, top_paths=1)[0][ 0]) + 1).flatten().tolist() # print('True transcription:\n' + '\n' + transcr) b = "".join(int_sequence_to_text(pred_ints)) a = transcr # print('Predicted transcription:\n' + '\n' + ''.join(int_sequence_to_text(pred_ints))) # print('-' * 80) # print('1.Editable Distance for ASR ==', edit(a, b), '\n') if (wer(a, b) <= 1): print('index_%d' % index, ':') wer_sum = wer_sum + wer(a, b) print(wer(a, b)) print("Transcription: ",a) print("Prediction: ",b) print('-' * 80) elif (): num = num + 1 num = num - 1 elif partition == 'train': num = 999 while num >= 0: index = random.randint(1, 10000) transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) # obtain and decode the acoustic model's predictions prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length, greedy=False, beam_width=100, top_paths=1)[0][ 0]) + 1).flatten().tolist() # print('True transcription:\n' + '\n' + transcr) b = "".join(int_sequence_to_text(pred_ints)) a = transcr # print('Predicted transcription:\n' + '\n' + ''.join(int_sequence_to_text(pred_ints))) # print('-' * 80) # print('1.Editable Distance for ASR ==', edit(a, b), '\n') if (wer(a, b) <= 1): print('index_%d' % index, ':') wer_sum = wer_sum + wer(a, b) print(wer(a, b)) print("Transcription: ",a) print("Prediction: ",b) print('-' * 80) elif (): num = num + 1 num = num - 1 else: raise Exception('Invalid partition! Must be "train" or "validation"') endtime = time.time() #print('1. Average Word Error Rate for ASR ==', wer_sum/100 , '%') print('1. Average Word Error Rate for ASR ==', wer_sum / 100) print('2. Average Real Time Factor for ASR ==', (endtime - starttime) / 100, '\n')
def get_predictions(indexes, partition, input_to_softmax, model_path): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data data_gen = AudioGenerator(spectrogram=False, mfcc_dim=13) data_gen.load_train_data() data_gen.load_validation_data() # loading language model alphabet = ''.join(index_map.values()) language_model = LanguageModel('data/word/corpus.txt', alphabet) # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) for index in indexes: # obtain the true transcription and the audio features if partition == 'validation': transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': transcr = data_gen.train_texts[index] audio_path = data_gen.train_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'test': transcr = 'hello how are you' audio_path = '../datasets/AgentExpress/hello.wav.wav' data_point = data_gen.normalize(data_gen.featurize(audio_path)) #print(data_point) else: raise Exception( 'Invalid partition! Must be "train" or "validation"') prediction = input_to_softmax.predict( np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] #print(K.eval(K.ctc_decode(prediction, output_length, False, 30, 1)[0][0])) pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() # play the audio file, and display the true and predicted transcriptions print('-' * 80) Audio(audio_path) print('ground_truth:' + ' ' * 4 + transcr) print('best_path:' + ' ' * 7 + ''.join(int_sequence_to_text(pred_ints))) pred_beam = ctcBeamSearch(prediction[0], alphabet, None) print('beam_search:' + ' ' * 5 + pred_beam) pred_beam_lm = ctcBeamSearch(prediction[0], alphabet, language_model) print('beam_search_lm:' + ' ' * 2 + pred_beam_lm) pred_token = ctcTokenPassing(prediction[0], alphabet, language_model.getWordList()) print('token_passing:' + ' ' * 3 + pred_token)
def get_predictions(index, partition, input_to_softmax, model_path, phn=False): """ Print a model's decoded predictions Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data data_gen = AudioGenerator() data_gen.load_train_data() data_gen.load_test_data() # obtain the true transcription and the audio features if partition == 'test': if phn: transcr = data_gen.test_phn_texts[index] audio_path = data_gen.test_phn_audio_paths[index] elif not phn: transcr = data_gen.test_wrd_texts[index] audio_path = data_gen.test_wrd_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) elif partition == 'train': if phn: transcr = data_gen.train_phn_texts[index] audio_path = data_gen.train_phn_audio_paths[index] elif not phn: transcr = data_gen.train_wrd_texts[index] audio_path = data_gen.train_wrd_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) else: raise Exception('Invalid partition! Must be "train" or "validation"') # obtain and decode the acoustic model's predictions input_to_softmax.load_weights(model_path) prediction = input_to_softmax.predict(np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() # play the audio file, and display the true and predicted transcriptions if not phn: print('-' * 80) Audio(audio_path) print('True transcription:\n' + '\n' + transcr) print('-' * 80) print('Predicted transcription:\n' + '\n' + ''.join(int_sequence_to_text(pred_ints, phn))) print('-' * 80) else: print('-' * 80) Audio(audio_path) print('True transcription:\n' + '\n' + transcr) print('-' * 80) print('Predicted transcription:\n' + '\n') split_true = transcr.split(" ") split_pred = (''.join(int_sequence_to_text(pred_ints, phn))).split(" ") print("\033[1;32m" + split_pred[0] + " ", end='') for i in range(1, len(split_true) - 1): if split_true[i - 1] == split_pred[i] or split_true[ i] == split_pred[i] or split_true[i + 1] == split_pred[i]: print("\033[1;32m" + split_pred[i] + " ", end='') else: print("\033[1;31m" + split_pred[i] + " ", end='') print(split_pred[len(split_true) - 1] + " ", end='') split_pred = (''.join(int_sequence_to_text(pred_ints, phn))).split(" ") split_true = transcr.split(" ") displayAccuracy(split_true, split_pred, phn)
from wer import wer from data_generator import AudioGenerator from keras import backend as K from utils import int_sequence_to_text import numpy as np from keras.callbacks import ModelCheckpoint, Callback valid_cache = [] data_gen = AudioGenerator(spectrogram=True) data_gen.load_train_data() data_gen.load_validation_data() for index in range(len(data_gen.valid_texts)): transcr = data_gen.valid_texts[index] audio_path = data_gen.valid_audio_paths[index] data_point = data_gen.normalize(data_gen.featurize(audio_path)) valid_cache.append(data_point) def calculate_wer2(input_to_softmax, model_path, words=False): # data_gen = AudioGenerator() # data_gen.load_train_data() # data_gen.load_validation_data() wers = [] input_to_softmax.load_weights(model_path) l = len(data_gen.valid_texts) l = 100 for index in range(l): transcr = data_gen.valid_texts[index] # audio_path = data_gen.valid_audio_paths[index]