Example #1
0
 def load_data(self):
     print('Extracting features for train set:')
     
     
     if os.path.exists(cfg.DATASET_PATH + 'features.pckl'):
         with open(cfg.DATASET_PATH + 'features.pckl', 'rb') as f:
             X = pickle.load(f)
         with open(cfg.DATASET_PATH + 'targets.pckl', 'rb') as f:
             y = pickle.load(f)
     else:
         X, y = extract_melgrams(cfg.LABELS_PATH)
         pickle.dump(X, open(cfg.DATASET_PATH + 'features.pckl', "wb"), protocol=4)
         pickle.dump(y, open(cfg.DATASET_PATH + 'targets.pckl', "wb"), protocol=4)
         
     self.X_train, self.X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.33, random_state=23)
     
     print('X_train shape:', self.X_train.shape)
     
     self.X_val, self.X_test, y_val, y_test = train_test_split(self.X_test, y_test, stratify=y_test, test_size=0.5, random_state=23)
     print('X_val shape:', self.X_val.shape)
     print('X_test shape:', self.X_test.shape)
     
     self.Y_train = np_utils.to_categorical(y_train, self.nb_classes)
     self.Y_val = np_utils.to_categorical(y_val, self.nb_classes)
     self.Y_test = np_utils.to_categorical(y_test, self.nb_classes)
Example #2
0
test_songs_list = 'list_example.txt'

# Initialize model
model = MusicTaggerCRNN(weights=None, input_tensor=(1, 96, 1366))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

if LOAD_WEIGHTS:
    model.load_weights(weights_path + 'custom_model_best.h5')

#model.summary()

X_test, num_frames_test = extract_melgrams(test_songs_list,
                                           MULTIFRAMES,
                                           process_all_song=False,
                                           num_songs_genre='')

num_frames_test = np.array(num_frames_test)

t0 = time.time()

print('\n--------- Predicting ---------', '\n')

results = np.zeros((X_test.shape[0], tags.shape[0]))
predicted_labels_mean = np.zeros((num_frames_test.shape[0], 1))
predicted_labels_frames = np.zeros((X_test.shape[0], 1))

song_paths = open(test_songs_list, 'r').read().splitlines()

previous_numFrames = 0
Example #3
0
    train_gt_list = 'lists/train_gt_list.txt'
    test_gt_list = 'lists/test_gt_list.txt'

# Data Loading

if LOAD_DB:
    if MULTIFRAMES:
        print 'Loading dataset multiframe...'
        X_train,  y_train, num_frames_train  = load_dataset('')
        X_test, y_test, num_frames_test = load_dataset('')
    else:
        X_train, X_test, y_train, y_test = load_dataset('')

# Compute mel-spectogram for all the frames
else:
    X_train, y_train, num_frames_train = extract_melgrams(train_songs_list, MULTIFRAMES, process_all_song=False, num_songs_genre=20)
    print('X_train shape:', X_train.shape)
    X_test, y_test, num_frames_test = extract_melgrams(test_songs_list, MULTIFRAMES, process_all_song=False, num_songs_genre=10)


print(X_train.shape, 'train samples')
print(X_test.shape, 'test samples')


y_train = np.array(y_train)
y_test = np.array(y_test)

if SAVE_DB:
    if MULTIFRAMES:
        save_dataset('music_dataset/music_dataset_multiframe_train.h5', X_train, y_train,num_frames_train)
        save_dataset('music_dataset/music_dataset_multiframe_test.h5', X_test,y_test,num_frames_test)
Example #4
0
def run(path):

    # Parameters to set
    TEST = 1

    LOAD_MODEL = 0
    LOAD_WEIGHTS = 1
    MULTIFRAMES = 1
    time_elapsed = 0

    # GTZAN Dataset Tags
    tags = [
        'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal',
        'pop', 'reggae', 'rock'
    ]
    tags = np.array(tags)

    # Paths to set
    model_name = "example_model"
    model_path = "models_trained/" + model_name + "/"
    weights_path = "models_trained/" + model_name + "/weights/"

    test_songs_list = path

    #if(vid_id == None):
    #    test_songs_list = "./music"
    #else:
    #    test_songs_list = "./music2"

    # Initialize model
    model = MusicTaggerCRNN(weights=None, input_tensor=(1, 96, 1366))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    if LOAD_WEIGHTS:
        model.load_weights(weights_path + 'crnn_net_gru_adam_ours_epoch_40.h5')

    #model.summary()

    X_test, num_frames_test = extract_melgrams(test_songs_list,
                                               MULTIFRAMES,
                                               process_all_song=False,
                                               num_songs_genre='')

    num_frames_test = np.array(num_frames_test)

    t0 = time.time()

    print '\n--------- Predicting ---------', '\n'

    results = np.zeros((X_test.shape[0], tags.shape[0]))
    predicted_labels_mean = np.zeros((num_frames_test.shape[0], 1))
    predicted_labels_frames = np.zeros((X_test.shape[0], 1))

    song_paths = os.listdir(test_songs_list)

    previous_numFrames = 0
    n = 0
    for i in range(0, num_frames_test.shape[0]):
        print 'Song number' + str(i) + ': ' + song_paths[i]

        num_frames = num_frames_test[i]
        print 'Num_frames of 30s: ', str(num_frames), '\n'

        results[previous_numFrames:previous_numFrames +
                num_frames] = model.predict(
                    X_test[previous_numFrames:previous_numFrames +
                           num_frames, :, :, :])

        s_counter = 0
        for j in range(previous_numFrames, previous_numFrames + num_frames):
            #normalize the results
            total = results[j, :].sum()
            results[j, :] = results[j, :] / total
            print 'Percentage of genre prediction for seconds '+ str(20+s_counter*30) + ' to ' \
                + str(20+(s_counter+1)*30) + ': '
            sort_result(tags, results[j, :].tolist())

            predicted_label_frames = predict_label(results[j, :])
            predicted_labels_frames[n] = predicted_label_frames
            s_counter += 1
            n += 1

        print '\n', 'Mean genre of the song: '
        results_song = results[previous_numFrames:previous_numFrames +
                               num_frames]

        mean = results_song.mean(0)
        sort_result(tags, mean.tolist())

        predicted_label_mean = predict_label(mean)

        predicted_labels_mean[i] = predicted_label_mean
        print '\n', 'The predicted music genre for the song is', str(
            tags[predicted_label_mean]), '!\n'

        previous_numFrames = previous_numFrames + num_frames

        print '************************************************************************************************'

    # colors = ['b','g','c','r','m','k','y','#ff1122','#5511ff','#44ff22']
    # fig, ax = plt.subplots()
    # index = np.arange(tags.shape[0])
    # opacity = 1
    # bar_width = 0.2
    # #print mean.tolist()
    # #for g in range(0, tags.shape[0]):
    # plt.bar(left=index, height=mean, width=bar_width, alpha=opacity, color=colors)
    #
    # plt.xlabel('Genres')
    # plt.ylabel('Percentage')
    # plt.title('Scores by genre')
    # plt.xticks(index + bar_width / 2, tags)
    # plt.tight_layout()
    # fig.autofmt_xdate()
    # plt.savefig('genres_prediction.png')

    return tags[predicted_label_mean], song_paths[0], mean.tolist()
Example #5
0
train_songs_list = 'lists/train_songs_list.txt'
test_songs_list = 'lists/test_songs_list.txt'

# Indicate the name of train and test songs
train_gt_list = 'lists/train_gt_list.txt'
test_gt_list = 'lists/test_gt_list.txt'

# Data Loading or computing the Mel-spectogram for each song
if LOAD:
    X_train, y_train, num_frames_train = load_dataset(
        'music_dataset/music_dataset_train.h5')
    X_test, y_test, num_frames_test = load_dataset(
        'music_dataset/music_dataset_test.h5')
else:
    print('Computing melgrams for training dataset')
    X_train, y_train, num_frames_train = extract_melgrams(
        train_songs_list, process_all_song=False, num_songs_genre=70)
    print('X_train shape:', X_train.shape)
    print('Computing melgrams for testing dataset')
    X_test, y_test, num_frames_test = extract_melgrams(test_songs_list,
                                                       process_all_song=False,
                                                       num_songs_genre=30)
    print('X_train shape:', X_train.shape)

print(X_train.shape, 'train samples')
print(X_test.shape, 'test samples')

y_train = np.array(y_train)
y_test = np.array(y_test)

Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)