def collect_data(dataset_path): ''' Collects data from the GTZAN dataset into a pickle. Computes a Mel-scaled power spectrogram for each track. :param dataset_path: path to the GTZAN dataset directory :returns: triple (x, y, track_paths) where x is a matrix containing extracted features, y is a one-hot matrix of genre labels and track_paths is a dict of absolute track paths indexed by row indices in the x and y matrices ''' default_shape = get_default_shape(dataset_path) x = np.zeros((TRACK_COUNT, ) + default_shape, dtype=np.float32) y = np.zeros((TRACK_COUNT, len(GENRES)), dtype=np.float32) track_paths = {} for (genre_index, genre_name) in enumerate(GENRES): for i in range(TRACK_COUNT // len(GENRES)): file_name = '{}/{}.000{}.au'.format(genre_name, genre_name, str(i).zfill(2)) print('Processing', file_name) path = os.path.join(dataset_path, file_name) track_index = genre_index * (TRACK_COUNT // len(GENRES)) + i x[track_index], _ = load_track(path, default_shape) y[track_index, genre_index] = 1 track_paths[track_index] = os.path.abspath(path) return (x, y, track_paths)
def collect_data(dataset_path): ''' Collects data from the GTZAN dataset into a pickle. Computes a Mel-scaled power spectrogram for each track. :param dataset_path: path to the GTZAN dataset directory :returns: triple (x, y, track_paths) where x is a matrix containing extracted features, y is a one-hot matrix of genre labels and track_paths is a dict of absolute track paths indexed by row indices in the x and y matrices ''' default_shape = get_default_shape(dataset_path) x = np.zeros((TRACK_COUNT, ) + default_shape, dtype=np.float32) y = np.zeros((TRACK_COUNT, len(GENRES)), dtype=np.float32) track_paths = {} counter = 0 for (genre_index, genre_name) in enumerate(GENRES): curr_path = dataset_path + "/" + genre_name + "/" os.chdir(curr_path) for file in os.listdir(): print('Processing', file) path = os.path.join(file) x[counter], _ = load_track(path, default_shape) y[counter, genre_index] = 1 track_paths[counter] = os.path.abspath(path) counter += 1 return (x, y, track_paths)
def collect_data(dataset_path, metadata_path): ''' Collects data from the FMA dataset into a pickle. Computes a Mel-scaled power spectrogram for each track. :param dataset_path: path to the FMA dataset directory :param dataset_metadata: path to the FMA metadata file :returns: triple (x, y, track_paths) where x is a matrix containing extracted features, y is a one-hot matrix of genre labels and track_paths is a dict of absolute track paths indexed by row indices in the x and y matrices ''' default_shape = get_default_shape(dataset_path) tracks = pickle.load(open(metadata_path, 'rb')) tracks = tracks[tracks['set', 'subset'] <= 'medium'] empty_files = np.array(['001486', '005574', '065753', '080391', '098558', '098559', '098560', '098571', '099134', '105247', '108925', '127336', '133297', '143992']) for x in empty_files: tracks = tracks.drop(int(x)) tracks.reset_index(inplace= True) t = tracks['track_id'] # Resample rus = RandomUnderSampler(random_state=1212) X_resampled, y_resampled = rus.fit_sample(np.array(t).reshape(-1, 1),tracks['track','genre_top']) X_resampled = X_resampled.reshape(336, ) y_resampled = y_resampled.reshape(336, ) TRACK_COUNT = X_resampled.shape[0] x = np.zeros((TRACK_COUNT,) + default_shape, dtype=np.float32) y = np.zeros((TRACK_COUNT, len(GENRES)), dtype=np.float32) track_paths = {} for i in range(TRACK_COUNT): tid_str = '{:06d}'.format(X_resampled[i]) file_name = os.path.join(dataset_path, tid_str[:3], tid_str + '.mp3') print(f"Processing {file_name} - {i}") track_index = i x[track_index], _ = load_track(file_name, default_shape) y[track_index, GENRES.index(y_resampled[i])] = 1 track_paths[track_index] = os.path.abspath(file_name) return (x, y, track_paths)
def create_data_pickle(self): output=np.array([]) id=1 for root, dirnames, filenames in os.walk(self.data_set_path): for filename in fnmatch.filter(filenames, '*.'+self.extension): full_file_path=os.path.join(root, filename) print('Processing '+full_file_path) self.file_list.append(full_file_path) self.file_meta.write(str(id)+'|'+full_file_path+'\n') mel_output,_=load_track(full_file_path,DEFAULT_SHAPE) mel_output=np.expand_dims(mel_output, axis=0) if output.shape[0] == 0: output=mel_output else: output=np.vstack((output,mel_output)) print(output.shape) return output
def collect_data(dataset_path): '''postaveni datove struktury trenovacich dat''' default_shape = get_default_shape(dataset_path) x = np.zeros((TRACK_COUNT,) + default_shape, dtype=np.float32) y = np.zeros((TRACK_COUNT, len(GENRES)), dtype=np.float32) track_paths = {} for (genre_index, genre_name) in enumerate(GENRES): for i in range(TRACK_COUNT // len(GENRES)): file_name = '{}/{}.000{}.au'.format(genre_name, genre_name, str(i).zfill(2)) print('Processing', file_name) path = os.path.join(dataset_path, file_name) track_index = genre_index * (TRACK_COUNT // len(GENRES)) + i x[track_index], _ = load_track(path, default_shape) y[track_index, genre_index] = 1 track_paths[track_index] = os.path.abspath(path) return (x, y, track_paths)
def data_generator(data, targets, batch_size): while True: cnt = 0 xx = [] yy = [] for i in range(0, len(data)): tmpx, _ = load_track(data[i], (934, 128), True) tmpy = targets[i] xx.append(tmpx) yy.append(tmpy) cnt += 1 if cnt >= batch_size: ret = (np.array(xx).reshape([-1, 934, 128]), np.array(yy).reshape(-1, len(GENRES))) cnt = 0 xx = [] yy = [] yield ret
def predict_genre(model_path, songs_path, output_path): ''' nacteni pisnicek a ulozeni do exceloveho souboru zanr pisnicek''' model = load_model(model_path) wb = Workbook() sheet = wb.active songs = [f for f in listdir(songs_path) if isfile(join(songs_path, f))] for index, song in enumerate(songs): song_data = load_track(os.path.join(songs_path, song))[0] song_data = np.expand_dims(song_data, axis=3) song_data = np.expand_dims(song_data, axis=0) result = model.predict(song_data) selected_genre = GENRES[np.argmax(result)] sheet.cell(row=index + 1, column=1).value = songs[index] sheet.cell(row=index + 1, column=2).value = "-" sheet.cell(row=index + 1, column=3).value = selected_genre wb.save(output_path)
def predict(): print ('Model loaded') testset_path=myFile x = data['x'] y = data['y'] t=data['track_paths'] print("classifying audio...") default_shape=(647, 128) TRACK_COUNT = 1000 test_pos=TRACK_COUNT+100 t1=np.zeros((TRACK_COUNT+200,) + default_shape, dtype=np.float32) t1=x print(testset_path) file_name='blues.00000.au' print('Processing', file_name) #path = os.path.join(testset_path, file_name) t1, _ = load_track(testset_path, default_shape) with graph.as_default(): pred1=model.predict(np.array([t1]))[0] predict_class=np.argmax(np.round(pred1)) index=predict_class time.sleep(3) print("Prediction for the selected song is ",labels[index]) os.remove(myFile) print("debug3") #convert the response to a string #response = np.array_str(np.argmax(out,axis=1)) return labels[index]
def recognize(self, track_path): print('Loading song', track_path) (features, duration) = load_track(track_path) features = np.reshape(features, (1, ) + features.shape) return (self.pred_fun(features), duration)
def get_default_shape(dataset_path): tmp_features, _ = load_track( os.path.join(dataset_path, 'blues/blues.00000.au')) return tmp_features.shape
def get_default_shape(): tmp_features, _ = load_track(DEFAULT_FILE) return tmp_features.shape
def get_default_shape(dataset_path): tmp_features, _ = load_track(os.path.join(dataset_path, '000/000002.mp3')) return tmp_features.shape
def get_default_shape(dataset_path): tmp_features, _ = load_track( os.path.join(dataset_path, 'Bolero Son/Juramento - Rey Caney.mp3.wav')) return tmp_features.shape