def LoadMetadata(self): """Loads metadata from csv files""" plt.rcParams['figure.figsize'] = (17, 5) # Load metadata and features. self.tracks = utils.load("fma_metadata/tracks.csv") self.genres = utils.load("fma_metadata/genres.csv") self.features = utils.load("fma_metadata/features.csv") self.echonest = utils.load("fma_metadata/echonest.csv") np.testing.assert_array_equal(self.features.index, self.tracks.index) assert self.echonest.index.isin(self.tracks.index).all() return self.tracks, self.genres, self.features, self.echonest
def filter_csv(): # Load metadata and features. tracks = utils.load('tracks.csv') small = tracks['set', 'subset'] <= 'small' genres = tracks.track['genre_top'] sets = tracks.set a = pd.concat([genres, sets], axis=1) a.to_csv('my_file.csv')
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import sklearn as skl import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm import librosa import librosa.display from collections import defaultdict import pickle from fma import utils AUDIO_DIR = os.environ.get('AUDIO_DIR') # Load metadata and features. tracks = utils.load('fma/data/fma_metadata/tracks.csv') genres = utils.load('fma/data/fma_metadata/genres.csv') features = utils.load('fma/data/fma_metadata/features.csv') echonest = utils.load('fma/data/fma_metadata/echonest.csv') small = tracks['set', 'subset'] <= 'small' sub = tracks.loc[small, ('track', 'genre_top')] small_subset = echonest.loc[small] filtered_cols = [('echonest', 'audio_features', 'acousticness'), ('echonest', 'audio_features', 'danceability'), ('echonest', 'audio_features', 'energy'), ('echonest', 'audio_features', 'instrumentalness'), ('echonest', 'audio_features', 'liveness'), ('echonest', 'audio_features', 'speechiness'),
('artist', 'active_year_end')] for column in COLUMNS: tracks[column] = pd.to_datetime(tracks[column]) SUBSETS = ('small', 'medium', 'large') tracks['set', 'subset'] = tracks['set', 'subset'].astype('category', categories=SUBSETS, ordered=True) COLUMNS = [('track', 'license'), ('artist', 'bio'), ('album', 'type'), ('album', 'information')] for column in COLUMNS: tracks[column] = tracks[column].astype('category') return tracks # Load metadata and features. tracks = load(METADATA_PATH + 'tracks.csv') #tracks = pd.read_csv(METADATA_PATH + 'tracks.csv', index_col=0, header=[0, 1]) genres = utils.load(METADATA_PATH + 'genres.csv') features = utils.load(METADATA_PATH + 'features.csv') echonest = utils.load(METADATA_PATH + 'echonest.csv') # np.testing.assert_array_equal(features.index, tracks.index) # assert echonest.index.isin(tracks.index).all() # print('Tracks shape: ', tracks.shape, 'Generes shape: ', genres.shape, # 'Features shape: ', features.shape, 'Echonest shape: ', echonest.shape) print(echonest.head())
import matplotlib matplotlib.use('TkAgg') from matplotlib import pyplot as plt import seaborn as sns import sklearn as skl import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm import librosa import librosa.display import utils from config import rawDataPath plt.rcParams['figure.figsize'] = (17, 5) # Directory where mp3 are stored. AUDIO_DIR = os.environ.get(rawDataPath) # Load metadata and features. tracks = utils.load('tracks.csv') genres = utils.load('genres.csv') features = utils.load('features.csv') echonest = utils.load('echonest.csv') np.testing.assert_array_equal(features.index, tracks.index) assert echonest.index.isin(tracks.index).all() tracks.shape, genres.shape, features.shape, echonest.shape
import IPython.display as ipd import numpy as np import pandas as pd import matplotlib.pyplot as plt import sklearn as skl import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm from pandas import CategoricalDtype from sklearn.metrics import recall_score # Files import utils if __name__ == '__main__': # Load metadata and features. tracks = utils.load('src/fma_metadata/tracks.csv') genres = utils.load('src/fma_metadata/genres.csv') features = utils.load('src/fma_metadata/features.csv') # assert and print shape np.testing.assert_array_equal(features.index, tracks.index) # print shapes print(tracks.shape, genres.shape, features.shape) # print heads # ipd.display(tracks['track'].head()) # ipd.display(tracks['album'].head()) # ipd.display(tracks['artist'].head()) # ipd.display(tracks['set'].head())
import librosa import librosa.display import utils import csv # https://nbviewer.jupyter.org/github/mdeff/fma/blob/outputs/usage.ipynb plt.rcParams['figure.figsize'] = (17, 5) # Directory where mp3 are stored. AUDIO_DIR = os.environ.get('/home/dan/fma_small/') # Load metadata and features. tracks = utils.load('tracks.csv') features = utils.load('features.csv') # https://www.analyticsvidhya.com/blog/2019/01/build-image-classification-model-10-minutes/ # https://colab.research.google.com/notebooks/gpu.ipynb#scrollTo=BlmQIFSLZDdc small = tracks['set', 'subset'] <= 'small' print(tracks['track_id']) train = tracks['set', 'split'] == 'training' val = tracks['set', 'split'] == 'validation' test = tracks['set', 'split'] == 'test' y_train = tracks.loc[small & train, ('track', 'genre_top')] y_test = tracks.loc[small & test, ('track', 'genre_top')]
import numpy as np import pandas as pd import sklearn as skl import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm import utils # https://github.com/mdeff/fma/issues/9 # look at comment to get branch with scripts that work with dataset if wanted # Load metadata and features. tracks = utils.load('fma_metadata/tracks.csv') # genres = utils.load('fma_metadata/genres.csv') features = utils.load('fma_metadata/features.csv') # echonest = utils.load('fma_metadata/echonest.csv') # np.testing.assert_array_equal(features.index, tracks.index) # assert echonest.index.isin(tracks.index).all() medium = tracks['set', 'subset'] <= 'medium' print(medium.shape) Y = tracks.loc[medium, ('track', 'genre_top')] X = features.loc[medium] print(X.shape) print('{} features, {} classes'.format(X.shape[1], np.unique(Y).size)) X.to_csv('data/dataSetMedium.csv') Y.to_csv('data/labelsMedium.csv')
import os import IPython.display as ipd import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import sklearn as skl import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm import librosa import librosa.display import python_utils as utils plt.rcParams['figure.figsize'] = (17, 5) # Directory where mp3 are stored. AUDIO_DIR = os.environ.get( '/Volumes/SAMSUNG/James/Physics and Machine Learning/Data/Music') # Load metadata and features. meta_data_path = '/Volumes/SAMSUNG/James/Physics and Machine Learning/Data/Music/fma_metadata' tracks = utils.load(meta_data_path + '/tracks.csv') genres = utils.load(meta_data_path + '/genres.csv') features = utils.load(meta_data_path + '/features.csv') echonest = utils.load(meta_data_path + '/echonest.csv') np.testing.assert_array_equal(features.index, tracks.index) assert echonest.index.isin(tracks.index).all() tracks.shape, genres.shape, features.shape, echonest.shape