def __init__(self, WindowSize=5, FeaturesDimension=41): # features normalization values self.mean_vect = np.load( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/TrainingSetMean.npy')) self.stdev_vect = np.load( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data/TrainingSetStDev.npy')) # TF graph initialization self.config = Configuration(WindowSize, FeaturesDimension) self.graph = tf.Graph() with self.graph.as_default(): self.feat = tf.placeholder( dtype=tf.float32, shape=[1, self.config.audio_feat_dimension]) with tf.variable_scope('model'): model = VAD_DNN.Model(self.feat, self.config) logits_prob = model.softmax # the probability of speech is given by the first dimension in the softmax # so we slice the output accordingly self.speech_prob = tf.slice(logits_prob, [0, 0], [-1, 1]) init_op = tf.local_variables_initializer() saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True self.session = tf.Session(graph=self.graph, config=config) self.session.run(init_op) saver.restore( self.session, os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/datamean_nodeltas_model_epoch13.ckpt"))
Vb.main(save_dir, prj_dir, 'bDNN', 'train', dev='/gpu:' + gpu_no) gs.freeze_graph(prj_dir + '/logs/bDNN', prj_dir + '/saved_model/graph/bDNN', 'model_1/logits,model_1/labels') if mode == 2: set_path = ps.PathSetting(prj_dir, 'DNN', save_dir) logs_dir = set_path.logs_dir os.system("rm -rf " + logs_dir + '/train') os.system("rm -rf " + logs_dir + '/valid') os.system("mkdir " + logs_dir + '/train') os.system("mkdir " + logs_dir + '/valid') Vd.main(save_dir, prj_dir, 'DNN', 'train', dev='/gpu:' + gpu_no) gs.freeze_graph(prj_dir + '/logs/DNN', prj_dir + '/saved_model/graph/DNN', 'model_1/soft_pred,model_1/raw_labels') if mode == 3: set_path = ps.PathSetting(prj_dir, 'LSTM', save_dir) logs_dir = set_path.logs_dir os.system("rm -rf " + logs_dir + '/train') os.system("rm -rf " + logs_dir + '/valid') os.system("mkdir " + logs_dir + '/train') os.system("mkdir " + logs_dir + '/valid') Vl.main(save_dir, prj_dir, 'LSTM', 'train', dev='/gpu:' + gpu_no)
pred, label = Vp.main() elif mode == 1: Vb.test_config(c_test_dir=data_dir, c_norm_dir=norm_dir, c_initial_logs_dir=model_dir, c_batch_size_eval=batch_size, c_data_len=data_len) pred, label = Vb.main() elif mode == 2: Vd.test_config(c_test_dir=data_dir, c_norm_dir=norm_dir, c_initial_logs_dir=model_dir, c_batch_size_eval=batch_size, c_data_len=data_len) pred, label = Vd.main() elif mode == 3: Vl.test_config(c_test_dir=data_dir, c_norm_dir=norm_dir, c_initial_logs_dir=model_dir, c_batch_size_eval=batch_size, c_data_len=data_len) pred, label = Vl.main()
Vb.main(prj_dir, 'bDNN', 'train') gs.freeze_graph(prj_dir + '/logs/bDNN', prj_dir + '/saved_model/graph/bDNN', 'model_1/logits,model_1/labels') # gs.freeze_graph(prj_dir + '/saved_model/temp', prj_dir + '/saved_model/temp', 'model_1/soft_pred,model_1/raw_labels') if mode == 2: set_path = ps.PathSetting(prj_dir, 'DNN') logs_dir = set_path.logs_dir os.system("rm -rf " + logs_dir + '/train') os.system("rm -rf " + logs_dir + '/valid') os.system("mkdir " + logs_dir + '/train') os.system("mkdir " + logs_dir + '/valid') Vd.main(prj_dir, 'DNN', 'train') gs.freeze_graph(prj_dir + '/logs/DNN', prj_dir + '/saved_model/graph/DNN', 'model_1/soft_pred,model_1/raw_labels') # gs.freeze_graph(prj_dir + '/saved_model/temp', prj_dir + '/saved_model/temp', 'model_1/soft_pred,model_1/raw_labels') if mode == 3: set_path = ps.PathSetting(prj_dir, 'LSTM') logs_dir = set_path.logs_dir os.system("rm -rf " + logs_dir + '/train') os.system("rm -rf " + logs_dir + '/valid') os.system("mkdir " + logs_dir + '/train') os.system("mkdir " + logs_dir + '/valid') Vl.main(prj_dir, 'LSTM', 'train')
def main( param=0.2, PATH_LOAD_FILE='/home/keums/Melody/dataset/adc2004_full_set/file/pop4.wav', PATH_SAVE_FILE='./SAVE_RESULTS/pop4.txt'): # PATH_LOAD_FILE = sys.argv[1] # PATH_SAVE_FILE = sys.argv[2] #================================== # Feature Extraction # .wav --> spectrogram #================================== x_test_log = myFeatureExtraction(PATH_LOAD_FILE) #path ?? #================================== # making multi column spectrogram # for trainging #================================== x_test_SF = making_multi_frame(x_test_log, num_frames=1) x_test_MF = making_multi_frame(x_test_log, num_frames=11) select_res_1st = 1 select_res_2nd = 2 select_res_3rd = 4 pitch_range = np.arange(min_pitch, max_pitch + 1.0 / select_res_3rd, 1.0 / select_res_3rd) #================================== # Melody extraction # using DNN #================================== y_predict_1st = MelodyExtraction_SCDNN(x_test_MF, select_res_1st) y_predict_2nd = MelodyExtraction_SCDNN(x_test_MF, select_res_2nd) y_predict_3rd = MelodyExtraction_SCDNN(x_test_MF, select_res_3rd) #================================== # merge SCDNN #================================== # print 'Merging....' ratio_res_1_3 = select_res_3rd / select_res_1st ratio_res_2_3 = select_res_3rd / select_res_2nd y_predict_tmp_1_3 = np.zeros(y_predict_3rd.shape) y_predict_tmp_2_3 = np.zeros(y_predict_3rd.shape) for i in range(y_predict_3rd.shape[0]): for j in range(y_predict_1st.shape[1] - 1): y_predict_tmp_1_3[i, j * ratio_res_1_3:j * ratio_res_1_3 + ratio_res_1_3] = y_predict_1st[i, j] y_predict_tmp_1_3[i, -1] = y_predict_1st[i, -1] for i in range(y_predict_3rd.shape[0]): for j in range(y_predict_2nd.shape[1] - 1): y_predict_tmp_2_3[i, j * ratio_res_2_3:j * ratio_res_2_3 + ratio_res_2_3] = y_predict_2nd[i, j] y_predict_tmp_2_3[i, -1] = y_predict_2nd[i, -1] # y_predict = (y_predict_tmp_1_3+0.0000001) *(y_predict_tmp_2_3+0.0000001) * (y_predict_3rd +0.0000001) y_predict = 10**(np.log10(y_predict_tmp_1_3) + np.log10(y_predict_tmp_2_3) + np.log10(y_predict_3rd)) del y_predict_tmp_1_3 del y_predict_tmp_2_3 #================================== # singing voice detection #================================== voice_frame_vad = VAD_DNN(x_test_SF, y_predict_1st, param=0.2) #================================== # viterbi algorithm #================================== path_viterbi = './viterbi/' path_prior_matrix_file = path_viterbi + 'prior_' + str( select_res_3rd) + '.npy' path_transition_matrix_file = path_viterbi + 'transition_matrix_' + str( select_res_3rd) + '.npy' prior = np.load(path_prior_matrix_file) transition_matrix = np.load(path_transition_matrix_file) viterbi_path = viterbi(y_predict, transition_matrix=transition_matrix, prior=prior, penalty=0, scaled=True) pitch_MIDI = np.zeros([y_predict.shape[0], 1]) pitch_freq = np.zeros([y_predict.shape[0], 1]) for i in range(y_predict.shape[0]): # for test : origianl # index_predict[i] = np.argmax(y_predict[i,:]) # pitch_MIDI[i] = pitch_range[index_predict[i]] #viterbi_path pitch_MIDI[i] = pitch_range[viterbi_path[i]] pitch_freq[i] = 2**((pitch_MIDI[i] - 69) / 12.) * 440 est_pitch = np.multiply(pitch_freq, voice_frame_vad) #================================== #adjust frame #================================== idx_shift = 2 shift_array = np.zeros(idx_shift) est_pitch = np.append(shift_array, est_pitch[:-idx_shift]) #================================== # save result #================================== PATH_est_pitch = PATH_SAVE_FILE if not os.path.exists(os.path.dirname(PATH_est_pitch)): os.makedirs(os.path.dirname(PATH_est_pitch)) f = open(PATH_est_pitch, 'w') for j in range(len(est_pitch)): est = "%f\t%f\n" % (0.01 * j, est_pitch[j]) f.write(est) f.close() print PATH_est_pitch
os.system("rm -rf " + save_dir) os.system("mkdir " + save_dir) os.system("mkdir " + save_dir + '/train') os.system("mkdir " + save_dir + '/valid') os.system( "matlab -r \"try acoustic_feat_ex(\'%s\',\'%s\'); catch; end; quit\"" % (train_data_dir, train_save_dir)) os.system( "matlab -r \"try acoustic_feat_ex(\'%s\',\'%s\'); catch; end; quit\"" % (valid_data_dir, valid_save_dir)) train_norm_dir = save_dir + '/train/global_normalize_factor.mat' test_norm_dir = prj_dir + '/norm_data/global_normalize_factor.mat' os.system("cp %s %s" % (train_norm_dir, test_norm_dir)) if mode == 0: logs_dir = prj_dir + '/logs' os.system("rm -rf " + logs_dir + '/train') os.system("rm -rf " + logs_dir + '/valid') os.system("mkdir " + logs_dir + '/train') os.system("mkdir " + logs_dir + '/valid') Vd.train_config(save_dir + '/train', save_dir + '/valid', prj_dir + '/logs', batch_size, train_step, 'train') Vd.main() # os.system("rm -rf") print("done")