예제 #1
0
    def LoadMetadata(self):
        """Loads metadata from csv files"""

        plt.rcParams['figure.figsize'] = (17, 5)

        # Load metadata and features.
        self.tracks = utils.load("../fma/data/tracks.csv")
        self.genres = utils.load("../fma/data/genres.csv")
        self.features = utils.load("../fma/data/features.csv")
        # self.echonest = utils.load("fma_metadata/echonest.csv")

        np.testing.assert_array_equal(self.features.index, self.tracks.index)
예제 #2
0
def main_orig():
    tracks = utils.load('tracks.csv')
    features = pd.DataFrame(index=tracks.index,
                            columns=columns(), dtype=np.float32)

    # More than usable CPUs to be CPU bound, not I/O bound. Beware memory.
    nb_workers = int(1.5 * len(os.sched_getaffinity(0)))

    # Longest is ~11,000 seconds. Limit processes to avoid memory errors.
    table = ((5000, 1), (3000, 3), (2000, 5), (1000, 10), (0, nb_workers))
    for duration, nb_workers in table:
        print('Working with {} processes.'.format(nb_workers))

        tids = tracks[tracks['track', 'duration'] >= duration].index
        tracks.drop(tids, axis=0, inplace=True)

        pool = multiprocessing.Pool(nb_workers)
        it = pool.imap_unordered(compute_features, tids)

        for i, row in enumerate(tqdm(it, total=len(tids))):
            features.loc[row.name] = row

            if i % 1000 == 0:
                save(features, 10)

    save(features, 10)
    test(features, 10)
예제 #3
0
def test(features, ndigits):

    indices = features[features.isnull().any(axis=1)].index
    if len(indices) > 0:
        print('Failed tracks: {}'.format(', '.join(str(i) for i in indices)))

    tmp = utils.load('features.csv')
    np.testing.assert_allclose(tmp.values, features.values, rtol=10**-ndigits)
예제 #4
0
    def LoadQuery(self, csv, feature="mfcc"):
        self.query = utils.load("featureFiles/" + csv)

        f = self.query[0] == feature
        self.query = self.query.loc[f]
        return self.query[3].values
from fma import utils
from utils.load_transform_data import VALID_GENRES, get_tracks_locations,\
    get_filename_and_ids


music_files_locations = get_tracks_locations()

tracks = utils.load('fma/tracks.csv')
small = tracks['set', 'subset'] <= 'small'
train = tracks['set', 'split'] == 'training'
val = tracks['set', 'split'] == 'validation'
test = tracks['set', 'split'] == 'test'
selected_genres = tracks['track', 'genre_top'].isin(VALID_GENRES)

x_train_ids = tracks.loc[small & train & selected_genres].index.values
x_val_ids = tracks.loc[small & val & selected_genres].index.values
x_test_ids = tracks.loc[small & test & selected_genres].index.values

train_valid_tracks = get_filename_and_ids(x_train_ids, music_files_locations)
validation_valid_tracks = get_filename_and_ids(x_val_ids,
                                               music_files_locations)

test_valid_tracks = get_filename_and_ids(x_test_ids, music_files_locations)
예제 #6
0
if args.wavelet:
	requested_wavelet = args.wavelet

if requested_split == "training":
	requested_split_path = "train"
elif requested_split == "validation":
	requested_split_path = "validation"
elif requested_split == "test":
	requested_split_path = "test"





# Load the metadata files
tracks = fma_utils.load(input_dir + 'tracks.csv')
features = fma_utils.load(input_dir + 'features.csv')

# Make sure everything in features is in tracks and vice versa
np.testing.assert_array_equal(features.index, tracks.index)

# Use the specified data subset:
subset = tracks['set', 'subset'] <= requested_subset
split = tracks['set', 'split'] == requested_split
rel_track_ids = tracks.loc[subset & split].index

y_values = tracks.loc[subset & split, ('track', 'genre_top')]
unique_genres = y_values.unique().categories


예제 #7
0
 def __load_data(self, path_prefix):
     self.tracks = utils.load(path_prefix + "/tracks.csv")
     self.genres = utils.load(path_prefix + "/genres.csv")
     self.features = utils.load(path_prefix + "/features.csv")