Ejemplo n.º 1
0
    def LoadMetadata(self):
        """Loads metadata from csv files"""

        plt.rcParams['figure.figsize'] = (17, 5)

        # Load metadata and features.
        self.tracks = utils.load("fma_metadata/tracks.csv")
        self.genres = utils.load("fma_metadata/genres.csv")
        self.features = utils.load("fma_metadata/features.csv")
        self.echonest = utils.load("fma_metadata/echonest.csv")

        np.testing.assert_array_equal(self.features.index, self.tracks.index)
        assert self.echonest.index.isin(self.tracks.index).all()
        return self.tracks, self.genres, self.features, self.echonest
Ejemplo n.º 2
0
def filter_csv():
    # Load metadata and features.
    tracks = utils.load('tracks.csv')

    small = tracks['set', 'subset'] <= 'small'

    genres = tracks.track['genre_top']
    sets = tracks.set

    a = pd.concat([genres, sets], axis=1)
    a.to_csv('my_file.csv')
Ejemplo n.º 3
0
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display
from collections import defaultdict
import pickle

from fma import utils

AUDIO_DIR = os.environ.get('AUDIO_DIR')

# Load metadata and features.
tracks = utils.load('fma/data/fma_metadata/tracks.csv')
genres = utils.load('fma/data/fma_metadata/genres.csv')
features = utils.load('fma/data/fma_metadata/features.csv')
echonest = utils.load('fma/data/fma_metadata/echonest.csv')

small = tracks['set', 'subset'] <= 'small'
sub = tracks.loc[small, ('track', 'genre_top')]

small_subset = echonest.loc[small]

filtered_cols = [('echonest', 'audio_features', 'acousticness'),
                 ('echonest', 'audio_features', 'danceability'),
                 ('echonest', 'audio_features', 'energy'),
                 ('echonest', 'audio_features', 'instrumentalness'),
                 ('echonest', 'audio_features', 'liveness'),
                 ('echonest', 'audio_features', 'speechiness'),
Ejemplo n.º 4
0
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        tracks['set', 'subset'] = tracks['set',
                                         'subset'].astype('category',
                                                          categories=SUBSETS,
                                                          ordered=True)

        COLUMNS = [('track', 'license'), ('artist', 'bio'), ('album', 'type'),
                   ('album', 'information')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


# Load metadata and features.
tracks = load(METADATA_PATH + 'tracks.csv')
#tracks = pd.read_csv(METADATA_PATH + 'tracks.csv', index_col=0, header=[0, 1])
genres = utils.load(METADATA_PATH + 'genres.csv')
features = utils.load(METADATA_PATH + 'features.csv')
echonest = utils.load(METADATA_PATH + 'echonest.csv')

# np.testing.assert_array_equal(features.index, tracks.index)
# assert echonest.index.isin(tracks.index).all()

# print('Tracks shape: ', tracks.shape, 'Generes shape: ', genres.shape,
#       'Features shape: ', features.shape, 'Echonest shape: ',  echonest.shape)
print(echonest.head())
Ejemplo n.º 5
0
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display

import utils

from config import rawDataPath


plt.rcParams['figure.figsize'] = (17, 5)

# Directory where mp3 are stored.
AUDIO_DIR = os.environ.get(rawDataPath)

# Load metadata and features.
tracks = utils.load('tracks.csv')
genres = utils.load('genres.csv')
features = utils.load('features.csv')
echonest = utils.load('echonest.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, genres.shape, features.shape, echonest.shape

Ejemplo n.º 6
0
import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
from pandas import CategoricalDtype
from sklearn.metrics import recall_score

# Files
import utils

if __name__ == '__main__':
    # Load metadata and features.
    tracks = utils.load('src/fma_metadata/tracks.csv')
    genres = utils.load('src/fma_metadata/genres.csv')
    features = utils.load('src/fma_metadata/features.csv')

    # assert and print shape
    np.testing.assert_array_equal(features.index, tracks.index)

    # print shapes
    print(tracks.shape, genres.shape, features.shape)

    # print heads
    # ipd.display(tracks['track'].head())
    # ipd.display(tracks['album'].head())
    # ipd.display(tracks['artist'].head())
    # ipd.display(tracks['set'].head())
Ejemplo n.º 7
0
import librosa
import librosa.display

import utils

import csv

# https://nbviewer.jupyter.org/github/mdeff/fma/blob/outputs/usage.ipynb

plt.rcParams['figure.figsize'] = (17, 5)

# Directory where mp3 are stored.
AUDIO_DIR = os.environ.get('/home/dan/fma_small/')

# Load metadata and features.
tracks = utils.load('tracks.csv')
features = utils.load('features.csv')

# https://www.analyticsvidhya.com/blog/2019/01/build-image-classification-model-10-minutes/
# https://colab.research.google.com/notebooks/gpu.ipynb#scrollTo=BlmQIFSLZDdc

small = tracks['set', 'subset'] <= 'small'

print(tracks['track_id'])

train = tracks['set', 'split'] == 'training'
val = tracks['set', 'split'] == 'validation'
test = tracks['set', 'split'] == 'test'

y_train = tracks.loc[small & train, ('track', 'genre_top')]
y_test = tracks.loc[small & test, ('track', 'genre_top')]
Ejemplo n.º 8
0
import numpy as np
import pandas as pd
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import utils

# https://github.com/mdeff/fma/issues/9
# look at comment to get branch with scripts that work with dataset if wanted

# Load metadata and features.
tracks = utils.load('fma_metadata/tracks.csv')
# genres = utils.load('fma_metadata/genres.csv')
features = utils.load('fma_metadata/features.csv')
# echonest = utils.load('fma_metadata/echonest.csv')

# np.testing.assert_array_equal(features.index, tracks.index)
# assert echonest.index.isin(tracks.index).all()

medium = tracks['set', 'subset'] <= 'medium'
print(medium.shape)

Y = tracks.loc[medium, ('track', 'genre_top')]
X = features.loc[medium]
print(X.shape)

print('{} features, {} classes'.format(X.shape[1], np.unique(Y).size))

X.to_csv('data/dataSetMedium.csv')
Y.to_csv('data/labelsMedium.csv')
Ejemplo n.º 9
0
import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display
import python_utils as utils

plt.rcParams['figure.figsize'] = (17, 5)

# Directory where mp3 are stored.
AUDIO_DIR = os.environ.get(
    '/Volumes/SAMSUNG/James/Physics and Machine Learning/Data/Music')

# Load metadata and features.
meta_data_path = '/Volumes/SAMSUNG/James/Physics and Machine Learning/Data/Music/fma_metadata'
tracks = utils.load(meta_data_path + '/tracks.csv')
genres = utils.load(meta_data_path + '/genres.csv')
features = utils.load(meta_data_path + '/features.csv')
echonest = utils.load(meta_data_path + '/echonest.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, genres.shape, features.shape, echonest.shape