def load_parameters(script_line: str, script_path: str): """This function extracts an utterance's string identifier and features from an archive. Args: script_line: The string representing the desired utterance. script_path: The location of the script file that contains the line. Returns: feat: An array representing the acoustic data, one row per frame of data. utt: A string identifier for the utterance. """ utt, arc, frame_start, frame_end = parse_script_line( script_line, script_path) feat = read_htk_user_feat(arc) assert (frame_start == 0) assert (frame_end + 1 - frame_start == len(feat)) return feat, utt
# plot waveform plt.plot(x) plt.title('waveform') plt.savefig('fig/waveform.png', bbox_inches='tight') plt.close() # plot mel filterbank for i in range(0, fe.num_mel): plt.plot(fe.mel_filterbank[i, :]) plt.title('mel filterbank') plt.savefig('fig/mel_filterbank.png', bbox_inches='tight') plt.close() # plot log mel spectrum (fbank) plt.imshow( feat, origin='lower', aspect=4 ) # flip the image so that vertical frequency axis goes from low to high plt.title('log mel filterbank features (fbank)') plt.savefig('fig/fbank.png', bbox_inches='tight') plt.close() htk.write_htk_user_feat(feat, feat_file) print("Wrote {0} frames to {1}".format(feat.shape[1], feat_file)) # #if you want to verify, that the file was written correctly: feat2 = htk.read_htk_user_feat(name=feat_file) print("Read {0} frames rom {1}".format(feat2.shape[1], feat_file)) print("Per-element absolute error is {0}".format( np.linalg.norm(feat - feat2) / (feat2.shape[0] * feat2.shape[1])))
# plot mel filterbank for i in range(0, fe.num_mel): plt.plot(fe.mel_filterbank[i, :]) plt.title('mel filterbank') plt.savefig('fig/mel_filterbank.png', bbox_inches='tight') plt.close() # plot log mel spectrum (fbank) plt.imshow( feat, origin='lower', aspect=4 ) # flip the image so that vertical frequency axis goes from low to high plt.title('log mel filterbank features (fbank)') plt.savefig('fig/fbank.png', bbox_inches='tight') plt.close() htk.write_htk_user_feat(feat, feat_file) print("Wrote {0} frames to {1}".format(feat.shape[1], feat_file)) # if you want to verify, that the file was written correctly: feat2 = htk.read_htk_user_feat(name=feat_file).transpose() print("Read {0} frames rom {1}".format(feat2.shape[1], feat_file)) print("Per-element absolute error is {0}".format( np.linalg.norm(feat - feat2) / (feat2.shape[0] * feat2.shape[1]))) plt.imshow( feat2, origin='lower', aspect=4 ) # flip the image so that vertical frequency axis goes from low to high plt.title('Expected log mel filterbank features') plt.savefig('fig/fbank_exp.png', bbox_inches='tight') plt.close()