labels = [] label_dir = '/home/pony/github/ASR_libri/libri/cha-level/mfcc_and_label/label/' mfcc_dir = '/home/pony/github/ASR_libri/libri/cha-level/mfcc_and_label/mfcc/' if True: for subdir, dirs, files in os.walk(rootdir): if True: for f in files: fullFilename = os.path.join(subdir, f) filenameNoSuffix = os.path.splitext(fullFilename)[0] if f.endswith('.wav'): print fullFilename (rate, sig) = wav.read(fullFilename) mfcc = calcMFCC_delta_delta(sig, rate, win_length=0.020, win_step=0.020) # transpose mfcc to array of (39,time_length) mfcc = np.transpose(mfcc) print mfcc.shape # save mfcc to file m_f = mfcc_dir + filenameNoSuffix.split('/')[-1] + '.npy' np.save(m_f, mfcc) labelFilename = filenameNoSuffix + '.label' with open(labelFilename, 'r') as f: characters = f.readline().strip() print characters targets = [] for c in characters: if c == ' ':
def wav_to_mfcc(wav_file_path): rate, sig = wav.read(wav_file_path) mfcc = calcMFCC_delta_delta(sig, rate, win_length=0.020, win_step=0.010) mfcc = np.transpose(mfcc) return mfcc
output_logits = model(feat) output_probs = nn.functional.softmax(output_logits, dim=1) output_label = int(np.argmax(output_probs.squeeze().cpu().numpy())) output_probs = output_probs.squeeze().tolist() print("output_logits:{}".format(output_logits[0][output_label])) end_time=time.time() print("Model predicting time {}".format(end_time - start_time)) #print(output_logits) #print(output_probs) #print(output_label) output_person = label2person[output_label] return output_probs, output_label, output_person if __name__=="__main__": import scipy.io.wavfile as wav from calcmfcc import calcMFCC_delta_delta path = "./storage/Qiaomu.wav" (rate,sig) = wav.read(path) mfcc_feat = calcMFCC_delta_delta(sig,rate) model = RNN_model(14) model.load_state_dict(torch.load("models/best_model.pt")) with open("models/label_person_mapping.pickle", "rb") as file: label_person_mapping = pickle.load(file) label2person = label_person_mapping["label2person"] print(label2person) output_prob, output_person = classify_one_cycle(model, mfcc_feat, label2person) print("output_person:{}".format(output_person))
def POST(self): web.header('Access-Control-Allow-Origin', '*') web.header('Access-Control-Allow-Credentials', 'true') web.header('Access-Control-Allow-Methods', 'POST, GET, OPTIONS') data = web.input() start_time = [] end_time = [] sil_start_time = [] sil_end_time = [] if data.type == 'audio/mpeg': wf = open(dir_ + 'storage/' + str(data.url)[-6:] + '.mp3', 'wb') wf.write(data.file) wf.close() elif data.type == 'audio/wav': #read sample and create a copy sample_name = data.filename sf = open(dir_ + 'storage/' + sample_name, 'rb') wav_bytes = sf.read() file_name = 'curr.wav' wf = open(dir_ + 'storage/' + file_name, 'wb') wf.write(wav_bytes) wf.close() print("audio received ...") # make single channel [uncomment if passing dual channel audio] #silence_classf.make_one_channel(dir_+'storage',file_name) # call voice separation required coz due to ambient noise the silence detection doesn't work properly #new_fn_bg,new_fn_fg=silence_classf.vocal_separation(dir_+'storage',file_name) #sil_file_name=new_fn_fg # call umm segmentation (rate, sig) = wav.read(dir_ + 'storage/' + file_name) start_time = time.time() mfcc_feat = calcMFCC_delta_delta(sig, rate) mid_time = time.time() print("Signal processing time:{} s".format(mid_time - start_time)) model = RNN_model(15) if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') model.load_state_dict( torch.load("models/best_model.pt", map_location=device)) with open("models/label_person_mapping_new.pickle", "rb") as file: label_person_mapping = pickle.load(file) label2person = label_person_mapping["label2person"] output_prob, output_label, output_person = classify_one_cycle( model, mfcc_feat, label2person) end_time = time.time() target_prob = output_prob[output_label] if "_" in output_person: output_person = output_person.split( "_")[0] + " " + output_person.split("_")[1] print("output_person:{}".format(output_person)) print("output_probabilities:{}".format(target_prob)) print("Model predicting time:{} s".format(end_time - mid_time)) return json.dumps({ 'msg': "Your file is uploaded!", 'output_person': output_person, "output_probability": round(target_prob, 4), "total_time": round(end_time - start_time, 3), })