def LoadMetadata(self): """Loads metadata from csv files""" plt.rcParams['figure.figsize'] = (17, 5) # Load metadata and features. self.tracks = utils.load("../fma/data/tracks.csv") self.genres = utils.load("../fma/data/genres.csv") self.features = utils.load("../fma/data/features.csv") # self.echonest = utils.load("fma_metadata/echonest.csv") np.testing.assert_array_equal(self.features.index, self.tracks.index)
def main_orig(): tracks = utils.load('tracks.csv') features = pd.DataFrame(index=tracks.index, columns=columns(), dtype=np.float32) # More than usable CPUs to be CPU bound, not I/O bound. Beware memory. nb_workers = int(1.5 * len(os.sched_getaffinity(0))) # Longest is ~11,000 seconds. Limit processes to avoid memory errors. table = ((5000, 1), (3000, 3), (2000, 5), (1000, 10), (0, nb_workers)) for duration, nb_workers in table: print('Working with {} processes.'.format(nb_workers)) tids = tracks[tracks['track', 'duration'] >= duration].index tracks.drop(tids, axis=0, inplace=True) pool = multiprocessing.Pool(nb_workers) it = pool.imap_unordered(compute_features, tids) for i, row in enumerate(tqdm(it, total=len(tids))): features.loc[row.name] = row if i % 1000 == 0: save(features, 10) save(features, 10) test(features, 10)
def test(features, ndigits): indices = features[features.isnull().any(axis=1)].index if len(indices) > 0: print('Failed tracks: {}'.format(', '.join(str(i) for i in indices))) tmp = utils.load('features.csv') np.testing.assert_allclose(tmp.values, features.values, rtol=10**-ndigits)
def LoadQuery(self, csv, feature="mfcc"): self.query = utils.load("featureFiles/" + csv) f = self.query[0] == feature self.query = self.query.loc[f] return self.query[3].values
from fma import utils from utils.load_transform_data import VALID_GENRES, get_tracks_locations,\ get_filename_and_ids music_files_locations = get_tracks_locations() tracks = utils.load('fma/tracks.csv') small = tracks['set', 'subset'] <= 'small' train = tracks['set', 'split'] == 'training' val = tracks['set', 'split'] == 'validation' test = tracks['set', 'split'] == 'test' selected_genres = tracks['track', 'genre_top'].isin(VALID_GENRES) x_train_ids = tracks.loc[small & train & selected_genres].index.values x_val_ids = tracks.loc[small & val & selected_genres].index.values x_test_ids = tracks.loc[small & test & selected_genres].index.values train_valid_tracks = get_filename_and_ids(x_train_ids, music_files_locations) validation_valid_tracks = get_filename_and_ids(x_val_ids, music_files_locations) test_valid_tracks = get_filename_and_ids(x_test_ids, music_files_locations)
if args.wavelet: requested_wavelet = args.wavelet if requested_split == "training": requested_split_path = "train" elif requested_split == "validation": requested_split_path = "validation" elif requested_split == "test": requested_split_path = "test" # Load the metadata files tracks = fma_utils.load(input_dir + 'tracks.csv') features = fma_utils.load(input_dir + 'features.csv') # Make sure everything in features is in tracks and vice versa np.testing.assert_array_equal(features.index, tracks.index) # Use the specified data subset: subset = tracks['set', 'subset'] <= requested_subset split = tracks['set', 'split'] == requested_split rel_track_ids = tracks.loc[subset & split].index y_values = tracks.loc[subset & split, ('track', 'genre_top')] unique_genres = y_values.unique().categories
def __load_data(self, path_prefix): self.tracks = utils.load(path_prefix + "/tracks.csv") self.genres = utils.load(path_prefix + "/genres.csv") self.features = utils.load(path_prefix + "/features.csv")