def _main(wav_file,input_json,output_json,mode): root_path = os.path.join(os.path.dirname(__file__)) joint_cnn_model_path = os.path.join(root_path, 'cnnModels', 'joint') # load keras joint cnn model model_joint = load_model(os.path.join(joint_cnn_model_path, 'jan_joint0.h5')) # load log mel feature scaler scaler_joint = pickle.load(open(os.path.join(joint_cnn_model_path, 'scaler_joint.pkl'), 'rb')) data_wav, fs_wav = librosa.load(wav_file,sr=44100) mfshs = MFSHS(data_wav) mfshs.frame() pitches = mfshs.pitches zeroAmploc = mfshs.zeroAmploc #frequency = np.array(pitchResult['frequency']) log_mel_old = get_log_mel_madmom(wav_file, fs=fs_wav, hopsize_t=hopsize_t, channel=1) log_mel = scaler_joint.transform(log_mel_old) log_mel = feature_reshape(log_mel, nlen=7) log_mel = np.expand_dims(log_mel, axis=1) obs_syllable, obs_phoneme = model_joint.predict(log_mel, batch_size=128, verbose=2) obs_syllable = np.squeeze(obs_syllable) obs_syllable = smooth_obs(obs_syllable) obs_syllable[0] = 1.0 obs_syllable[-1] = 0.0 #print sf_onset_frame score_note,pauseLoc = parse_musescore(input_json) resultOnset = findPeak(obs_syllable,pitches,score_note) Note_and_onset = pitch_Note(pitches,resultOnset['onset_frame'],score_note) score_note = np.array(score_note) result_loc_info = sw_alignment(score_note,Note_and_onset['notes']) #result_info = saveJson(filename_json,pitches,resultOnset['onset_frame'],score_note,pauseLoc,mode) post_proprocess(output_json,pitches,resultOnset['onset_frame'],zeroAmploc,score_note,pauseLoc,result_loc_info,mode)
def _main(wav_file, input_json, output_json): ''' first detect pitches ''' mfshs = MFSHS(wav_file) mfshs.pitch_detector() pitches = mfshs.pitches zero_amp_frame = mfshs.zeroAmploc score_note, pauseLoc = parse_musescore(input_json) ''' second detect onset ''' onset_frame = detector_onset(wav_file, pitches, score_note) ''' sw alignment ''' match_loc_info = sw_alignment(pitches, onset_frame, score_note) ''' post process and save result ''' onset_offset_pitches = trans_onset_and_offset(match_loc_info, onset_frame, pitches) evaluator = Evaluator(output_json, onset_offset_pitches, zero_amp_frame, score_note, pauseLoc)
def main(wav_file, score_file): ''' first detect pitches ''' mfshs = MFSHS(wav_file) mfshs.pitch_detector() pitches = mfshs.pitches zero_amp_frame = mfshs.zeroAmploc energes = mfshs.energes score_note, note_type, pauseLoc = parse_musescore( score_file) ## parse musescore ''' second detect onset ''' onset_frame = detector_onset(wav_file, pitches, score_note) ''' sw alignment ''' match_loc_info = sw_alignment(pitches, onset_frame, score_note) ''' post process and save result ''' onset_offset_pitches = trans_onset_and_offset(match_loc_info, onset_frame, pitches) filename_json = os.path.splitext(wav_file)[0] + ".json" evaluator = Evaluator(filename_json, onset_offset_pitches, zero_amp_frame, score_note, pauseLoc, note_type) save_files(wav_file, onset_frame, pitches, evaluator.det_note, score_note) '''
def cal_system_acc(wav_files): def cal_acc(det_note, score_note): det_note = np.array(det_note) score_note = np.array(score_note) det_note = is_octive(det_note, score_note) diff_note = abs(det_note - score_note) acc_index = np.where(diff_note <= 0.8)[0] accuracy = len(acc_index) / len(diff_note) return accuracy, diff_note sys_acc_res = dict() for index in range(len(wav_files)): json_path = '/home/data/lj/onset_detect/MUS/evaluate/' wav_file = wav_files[index] fid = os.path.basename(wav_file)[0:-4] #score_note = np.array(score_notes[fid]).astype(int) json_path = os.path.join(json_path, fid) score_file = [ os.path.join(json_path, x) for x in os.listdir(json_path) if x.endswith('json') ][0] mfshs = MFSHS(wav_file) mfshs.pitch_detector() pitches = mfshs.pitches zero_amp_frame = mfshs.zeroAmploc score_note, note_types, pauseLoc = parse_musescore( score_file) # parse musescore predictor = predictor_onset() onset_time = predictor.predict(wav_file) onset_frame = predictor.onset_frame onset_frame = post_cnn_onset(pitches, onset_frame) match_loc_info = sw_alignment(pitches, onset_frame, score_note) onset_offset_pitches = trans_onset_and_offset(match_loc_info, onset_frame, pitches) filename_json = os.path.splitext(wav_file)[0] + ".json" evaluator = Evaluator(filename_json, onset_offset_pitches, zero_amp_frame, score_note, pauseLoc, note_types) accuracy, diff_note = cal_acc(evaluator.det_note, score_note) temp = dict() temp['acc'] = accuracy temp['dnote'] = evaluator.det_note temp['snote'] = score_note temp['diff_note'] = diff_note sys_acc_res[fid] = temp return sys_acc_res
def main(wav_file, score_file): # mfshs = MFSHS(wav_file) # mfshs.pitch_detector() # pitches = mfshs.pitches #è¿â€Ã¥â€ºÅ¾Ã©Å¸Â³Ã©Â«Ë? # print(type(pitches),pitches) # zero_amp_frame = mfshs.zeroAmploc #音高ä¸?ç´¢å¼â€? # print(type(zero_amp_frame),zero_amp_frame) # pitches_filepath = "/home/ywm/MUSIC/new_solfege_pYIN/data/1011_pitch.txt" # pitches = [] # with open(pitches_filepath,'r') as f: # a = f.readlines() # for i in a: # pitches.append(float(i.split()[0])) # pitches = np.array(pitches) pitches = demo.pYIN(wav_file) pitches = np.array(pitches) - 20 pitches = np.where((pitches < 0.0), 0, pitches) #print(type(pitches),pitches) zero_amp_frame = np.where(pitches == 0)[0] score_note, note_types, pauseLoc = parse_musescore( #解æžÂjsonä¹Âè°±,è¿â€Ã¥â€ºÅ¾Ã¤Â¹Âè°±ä¸Âçš„note值和休æ¢符ä½ÂçÂ? score_file) # parse musescore predictor = predictor_onset() onset_time = predictor.predict(wav_file) #draw_array(predictor.onset_pred) onset_frame = predictor.onset_frame onset_frame = post_cnn_onset(pitches, onset_frame) #print("onset_frame:",onset_frame) match_loc_info = sw_alignment(pitches, onset_frame, score_note) #print(2) onset_offset_pitches = trans_onset_and_offset(match_loc_info, onset_frame, pitches) #print("onset_offset_pitches:",onset_offset_pitches) filename_json = os.path.splitext(wav_file)[0] + ".json" evaluator = Evaluator(filename_json, onset_offset_pitches, zero_amp_frame, score_note, pauseLoc, note_types) #print(4) save_files(wav_file, onset_frame, pitches, evaluator.det_note, score_note, onset_offset_pitches['onset_frame']) #print(5) return evaluator.score
def print_save_acc(wav_files, ground_notes, score_notes): fwt = open('log/acc.txt', 'w') for index in range(len(wav_files)): json_path = '/home/data/lj/onset_detect/MUS/evaluate/' wav_file = wav_files[index] fid = os.path.basename(wav_file)[0:-4] # score_note = np.array(score_notes[fid]).astype(int) json_path = os.path.join(json_path, fid) score_file = [ os.path.join(json_path, x) for x in os.listdir(json_path) if x.endswith('json') ][0] mfshs = MFSHS(wav_file) mfshs.pitch_detector() pitches = mfshs.pitches zero_amp_frame = mfshs.zeroAmploc score_note, note_types, pauseLoc = parse_musescore( score_file) # parse musescore predictor = predictor_onset() onset_time = predictor.predict(wav_file) onset_frame = predictor.onset_frame onset_frame = post_cnn_onset(pitches, onset_frame) match_loc_info = sw_alignment(pitches, onset_frame, score_note) onset_offset_pitches = trans_onset_and_offset(match_loc_info, onset_frame, pitches) filename_json = os.path.splitext(wav_file)[0] + ".json" evaluator = Evaluator(filename_json, onset_offset_pitches, zero_amp_frame, score_note, pauseLoc, note_types) print(wav_file) sys_acc, sys_diff_note = cal_acc(evaluator.det_note, score_note) gnote = ground_notes[fid] snote = score_note ground_acc, ground_diff_note = cal_acc(gnote, snote) ground_sys_acc, ground_sys_diff_note = cal_acc(evaluator.det_note, gnote) strs = '{}\t{}\t{}'.format('ground', 'sys', 'ground_sys') print(strs) strs = '{:.3f}\t{:.3f}\t{:.3f}\n'.format(ground_acc, sys_acc, ground_sys_acc) print(strs) file_name = os.path.join('log', fid + '.txt') with open(file_name, 'w') as fw: fw.write('gnote\t\tsnote\t\tdnote\n') for index in xrange(len(gnote)): fw.write('{:.3f}\t\t{:.3f}\t\t{:.3f}\t\t'.format( gnote[index], snote[index], evaluator.det_note[index])) fw.write('{:.3f}\t\t{:.3f}\t\t{:.3f}\n'.format( ground_diff_note[index], sys_diff_note[index], ground_sys_diff_note[index])) fw.write('{:.3f}\t\t{:.3f}\t\t{:.3f}\n'.format( ground_acc, sys_acc, ground_sys_acc)) fwt.write(wav_file + '\n') fwt.write(strs) fwt.write('\n') fwt.flush() fwt.close()
def _main(wav_file,score_file,est_file=None): print(wav_file) data_wav, fs_wav = librosa.load(wav_file,sr=44100) #start_time = time.time() start_time = time.time() mfshs = MFSHS(data_wav) mfshs.frame() pitches = mfshs.pitches energes = mfshs.energes zeroAmploc = mfshs.zeroAmploc #print('pitch detection time:',time.time()-start_time) root_path = os.path.join(os.path.dirname(__file__)) joint_cnn_model_path = os.path.join(root_path, 'cnnModels', 'joint') # load keras joint cnn model model_joint = load_model(os.path.join(joint_cnn_model_path, 'jan_joint0.h5')) # load log mel feature scaler scaler_joint = pickle.load(open(os.path.join(joint_cnn_model_path, 'scaler_joint.pkl'), 'rb')) log_mel_old = get_log_mel_madmom(wav_file, fs=fs_wav, hopsize_t=hopsize_t, channel=1) log_mel = scaler_joint.transform(log_mel_old) log_mel = feature_reshape(log_mel, nlen=7) log_mel = np.expand_dims(log_mel, axis=1) #start_time = time.time() obs_syllable, obs_phoneme = model_joint.predict(log_mel, batch_size=128, verbose=2) #print('cnn detection time: ',time.time()-start_time) obs_syllable = np.squeeze(obs_syllable) obs_syllable = smooth_obs(obs_syllable) obs_syllable[0] = 1.0 obs_syllable[-1] = 0.0 #start_time = time.time() score_note,pauseLoc = parse_musescore(score_file) resultOnset = findPeak(obs_syllable,pitches,score_note,est_file) filename_json = os.path.splitext(wav_file)[0]+".json" #print('post-processing time :' ,time.time()-start_time) Note_and_onset = pitch_Note(pitches,resultOnset['onset_frame'],score_note) #draw_energe(energes,resultOnset['onset_frame'],zeroAmploc) score_note = np.array(score_note) result_loc_info = sw_alignment(score_note,Note_and_onset['notes']) #result_info,paddingzero_frame = saveJson(filename_json,pitches,resultOnset['onset_frame'],score_note,pauseLoc,0) result_info,det_Note = post_proprocess(filename_json,pitches,resultOnset['onset_frame'],zeroAmploc,score_note,pauseLoc,result_loc_info,0) #print("total time:",time.time()-start_time) filename_pitch = os.path.splitext(wav_file)[0]+"_pitch.txt" mfshs.saveArray(filename_pitch,pitches) filename_onset = os.path.splitext(wav_file)[0]+"_onset.txt" mfshs.saveArray(filename_onset,resultOnset['onset_time']) filename_score = os.path.splitext(wav_file)[0]+"_score.txt" mfshs.saveArray(filename_score,score_note) filename_detnote = os.path.splitext(wav_file)[0]+"_detnote.txt" mfshs.saveArray(filename_detnote,np.round(np.array(det_Note),2)) return result_info['score']
47, 47, 49, 49, 47, 47, 44, 42, 45, 44, 42, 40 ] onset_frame = [ 11, 34, 74, 93, 128, 159, 192, 226, 261, 303, 343, 366, 439, 640, 642, 763, 797, 844, 873, 906, 943, 977, 1016, 1206, 1354, 1356, 1400, 1474, 1510, 1542, 1577, 1617, 1768, 1801, 1836, 1877, 1911, 1947, 2052, 2062, 2101, 2138, 2177, 2218, 2239 ] def load(f0_file): f0_array = [] with open(f0_file, 'r+') as f: f0_list = f.readlines() for f0 in f0_list: try: f0 = float(f0.strip()) except BaseException as e: print(e) f0 = (69 + 12 * math.log(f0 / 440) / math.log(2)) if f0 > 0 else 0 f0_array.append(f0) f.close() pitches = np.array(f0_array) return pitches if __name__ == "__main__": f0_file = os.path.join(dirpath, "1011_f0.txt") pitches = load(f0_file) match_loc_info = sw_alignment(pitches, onset_frame, score_note) print(match_loc_info)