def load_data(self): print('Extracting features for train set:') if os.path.exists(cfg.DATASET_PATH + 'features.pckl'): with open(cfg.DATASET_PATH + 'features.pckl', 'rb') as f: X = pickle.load(f) with open(cfg.DATASET_PATH + 'targets.pckl', 'rb') as f: y = pickle.load(f) else: X, y = extract_melgrams(cfg.LABELS_PATH) pickle.dump(X, open(cfg.DATASET_PATH + 'features.pckl', "wb"), protocol=4) pickle.dump(y, open(cfg.DATASET_PATH + 'targets.pckl', "wb"), protocol=4) self.X_train, self.X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.33, random_state=23) print('X_train shape:', self.X_train.shape) self.X_val, self.X_test, y_val, y_test = train_test_split(self.X_test, y_test, stratify=y_test, test_size=0.5, random_state=23) print('X_val shape:', self.X_val.shape) print('X_test shape:', self.X_test.shape) self.Y_train = np_utils.to_categorical(y_train, self.nb_classes) self.Y_val = np_utils.to_categorical(y_val, self.nb_classes) self.Y_test = np_utils.to_categorical(y_test, self.nb_classes)
test_songs_list = 'list_example.txt' # Initialize model model = MusicTaggerCRNN(weights=None, input_tensor=(1, 96, 1366)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if LOAD_WEIGHTS: model.load_weights(weights_path + 'custom_model_best.h5') #model.summary() X_test, num_frames_test = extract_melgrams(test_songs_list, MULTIFRAMES, process_all_song=False, num_songs_genre='') num_frames_test = np.array(num_frames_test) t0 = time.time() print('\n--------- Predicting ---------', '\n') results = np.zeros((X_test.shape[0], tags.shape[0])) predicted_labels_mean = np.zeros((num_frames_test.shape[0], 1)) predicted_labels_frames = np.zeros((X_test.shape[0], 1)) song_paths = open(test_songs_list, 'r').read().splitlines() previous_numFrames = 0
train_gt_list = 'lists/train_gt_list.txt' test_gt_list = 'lists/test_gt_list.txt' # Data Loading if LOAD_DB: if MULTIFRAMES: print 'Loading dataset multiframe...' X_train, y_train, num_frames_train = load_dataset('') X_test, y_test, num_frames_test = load_dataset('') else: X_train, X_test, y_train, y_test = load_dataset('') # Compute mel-spectogram for all the frames else: X_train, y_train, num_frames_train = extract_melgrams(train_songs_list, MULTIFRAMES, process_all_song=False, num_songs_genre=20) print('X_train shape:', X_train.shape) X_test, y_test, num_frames_test = extract_melgrams(test_songs_list, MULTIFRAMES, process_all_song=False, num_songs_genre=10) print(X_train.shape, 'train samples') print(X_test.shape, 'test samples') y_train = np.array(y_train) y_test = np.array(y_test) if SAVE_DB: if MULTIFRAMES: save_dataset('music_dataset/music_dataset_multiframe_train.h5', X_train, y_train,num_frames_train) save_dataset('music_dataset/music_dataset_multiframe_test.h5', X_test,y_test,num_frames_test)
def run(path): # Parameters to set TEST = 1 LOAD_MODEL = 0 LOAD_WEIGHTS = 1 MULTIFRAMES = 1 time_elapsed = 0 # GTZAN Dataset Tags tags = [ 'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock' ] tags = np.array(tags) # Paths to set model_name = "example_model" model_path = "models_trained/" + model_name + "/" weights_path = "models_trained/" + model_name + "/weights/" test_songs_list = path #if(vid_id == None): # test_songs_list = "./music" #else: # test_songs_list = "./music2" # Initialize model model = MusicTaggerCRNN(weights=None, input_tensor=(1, 96, 1366)) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if LOAD_WEIGHTS: model.load_weights(weights_path + 'crnn_net_gru_adam_ours_epoch_40.h5') #model.summary() X_test, num_frames_test = extract_melgrams(test_songs_list, MULTIFRAMES, process_all_song=False, num_songs_genre='') num_frames_test = np.array(num_frames_test) t0 = time.time() print '\n--------- Predicting ---------', '\n' results = np.zeros((X_test.shape[0], tags.shape[0])) predicted_labels_mean = np.zeros((num_frames_test.shape[0], 1)) predicted_labels_frames = np.zeros((X_test.shape[0], 1)) song_paths = os.listdir(test_songs_list) previous_numFrames = 0 n = 0 for i in range(0, num_frames_test.shape[0]): print 'Song number' + str(i) + ': ' + song_paths[i] num_frames = num_frames_test[i] print 'Num_frames of 30s: ', str(num_frames), '\n' results[previous_numFrames:previous_numFrames + num_frames] = model.predict( X_test[previous_numFrames:previous_numFrames + num_frames, :, :, :]) s_counter = 0 for j in range(previous_numFrames, previous_numFrames + num_frames): #normalize the results total = results[j, :].sum() results[j, :] = results[j, :] / total print 'Percentage of genre prediction for seconds '+ str(20+s_counter*30) + ' to ' \ + str(20+(s_counter+1)*30) + ': ' sort_result(tags, results[j, :].tolist()) predicted_label_frames = predict_label(results[j, :]) predicted_labels_frames[n] = predicted_label_frames s_counter += 1 n += 1 print '\n', 'Mean genre of the song: ' results_song = results[previous_numFrames:previous_numFrames + num_frames] mean = results_song.mean(0) sort_result(tags, mean.tolist()) predicted_label_mean = predict_label(mean) predicted_labels_mean[i] = predicted_label_mean print '\n', 'The predicted music genre for the song is', str( tags[predicted_label_mean]), '!\n' previous_numFrames = previous_numFrames + num_frames print '************************************************************************************************' # colors = ['b','g','c','r','m','k','y','#ff1122','#5511ff','#44ff22'] # fig, ax = plt.subplots() # index = np.arange(tags.shape[0]) # opacity = 1 # bar_width = 0.2 # #print mean.tolist() # #for g in range(0, tags.shape[0]): # plt.bar(left=index, height=mean, width=bar_width, alpha=opacity, color=colors) # # plt.xlabel('Genres') # plt.ylabel('Percentage') # plt.title('Scores by genre') # plt.xticks(index + bar_width / 2, tags) # plt.tight_layout() # fig.autofmt_xdate() # plt.savefig('genres_prediction.png') return tags[predicted_label_mean], song_paths[0], mean.tolist()
train_songs_list = 'lists/train_songs_list.txt' test_songs_list = 'lists/test_songs_list.txt' # Indicate the name of train and test songs train_gt_list = 'lists/train_gt_list.txt' test_gt_list = 'lists/test_gt_list.txt' # Data Loading or computing the Mel-spectogram for each song if LOAD: X_train, y_train, num_frames_train = load_dataset( 'music_dataset/music_dataset_train.h5') X_test, y_test, num_frames_test = load_dataset( 'music_dataset/music_dataset_test.h5') else: print('Computing melgrams for training dataset') X_train, y_train, num_frames_train = extract_melgrams( train_songs_list, process_all_song=False, num_songs_genre=70) print('X_train shape:', X_train.shape) print('Computing melgrams for testing dataset') X_test, y_test, num_frames_test = extract_melgrams(test_songs_list, process_all_song=False, num_songs_genre=30) print('X_train shape:', X_train.shape) print(X_train.shape, 'train samples') print(X_test.shape, 'test samples') y_train = np.array(y_train) y_test = np.array(y_test) Y_train = np_utils.to_categorical(y_train, 10) Y_test = np_utils.to_categorical(y_test, 10)