Esempio n. 1
0
def download_metadata():

    fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY'))

    max_tid = int(fma.get_recent_tracks()[0][0])
    print('Largest track id: {}'.format(max_tid))

    not_found = {}

    id_range = trange(max_tid, desc='tracks')
    tracks, not_found['tracks'] = fma.get_all('track', id_range)

    id_range = tqdm(tracks['album_id'].unique(), desc='albums')
    albums, not_found['albums'] = fma.get_all('album', id_range)

    id_range = tqdm(tracks['artist_id'].unique(), desc='artists')
    artists, not_found['artists'] = fma.get_all('artist', id_range)

    genres = fma.get_all_genres()

    for dataset in 'tracks', 'albums', 'artists', 'genres':
        eval(dataset).sort_index(axis=0, inplace=True)
        eval(dataset).sort_index(axis=1, inplace=True)
        eval(dataset).to_csv('raw_' + dataset + '.csv')

    pickle.dump(not_found, open('not_found.pickle', 'wb'))
Esempio n. 2
0
def download_data(args):

    dst_dir = os.path.join(os.path.abspath(args.path), 'fma_full')
    tracks = pd.read_csv('raw_tracks.csv', index_col=0)
    _create_subdirs(dst_dir, tracks)

    fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY'))
    not_found = pickle.load(open('not_found.pickle', 'rb'))
    not_found['audio'] = []

    # Download missing tracks.
    collected = 0
    for tid in tqdm(tracks.index):
        dst = utils.get_audio_path(dst_dir, tid)
        if not os.path.exists(dst):
            try:
                fma.download_track(tracks.at[tid, 'track_file'], dst)
                collected += 1
            except:  # requests.HTTPError
                not_found['audio'].append(tid)

    pickle.dump(not_found, open('not_found.pickle', 'wb'))

    existing = len(tracks) - collected - len(not_found['audio'])
    print('audio: {} collected, {} existing, {} not found'.format(
        collected, existing, len(not_found['audio'])))
Esempio n. 3
0
def download_metadata(args):

    fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY'))

    if args.tid_max is None:
        args.tid_max = int(fma.get_recent_tracks()[0][0])

    message = 'Collecting metadata from track ID {} to {}.'
    print(message.format(args.tid_min, args.tid_max))

    not_found = {}

    id_range = trange(args.tid_min, args.tid_max, desc='tracks')
    tracks, not_found['tracks'] = fma.get_all('track', id_range)

    id_range = tqdm(tracks['album_id'].unique(), desc='albums')
    albums, not_found['albums'] = fma.get_all('album', id_range)

    id_range = tqdm(tracks['artist_id'].unique(), desc='artists')
    artists, not_found['artists'] = fma.get_all('artist', id_range)

    genres = fma.get_all_genres()

    for dataset in 'tracks', 'albums', 'artists', 'genres':
        eval(dataset).sort_index(axis=0, inplace=True)
        eval(dataset).sort_index(axis=1, inplace=True)
        eval(dataset).to_csv('raw_' + dataset + '.csv')
        if dataset != 'genres':
            print('{}: {} collected, {} not found'.format(
                dataset, len(eval(dataset)), len(not_found[dataset])))

    pickle.dump(not_found, open('not_found.pickle', 'wb'))
def download_data(args):

    #traduire le chemin de maniere complete depuis le repertoire principal ~root puia rajoute fma_full a la fin
    dst_dir = os.path.join(os.path.abspath(args.path), 'fma_full')
    tracks = pd.read_csv('raw_tracks.csv', index_col=0)
    _create_subdirs(dst_dir, tracks)

    fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY'))
    not_found = pickle.load(open('not_found.pickle', 'rb'))
    not_found['audio'] = []

    # Download missing tracks.
    collected = 0
    for tid in tqdm(tracks.index):
        #recupere le chemin d'un audio a partir de son index
        dst = utils.get_audio_path(dst_dir, tid)
        #telecharge la music qui manque dans le repertoire (initialement vide donc il telecharge tous)
        if not os.path.exists(dst):
            try:
                fma.download_track(tracks.at[tid, 'track_file'], dst)
                collected += 1
            except:  # requests.HTTPError
                not_found['audio'].append(tid)

    #serialiser le resultat pour stocker les audios introuvable
    pickle.dump(not_found, open('not_found.pickle', 'wb'))

    existing = len(tracks) - collected - len(not_found['audio'])
    print('audio: {} collected, {} existing, {} not found'.format(
        collected, existing, len(not_found['audio'])))
Esempio n. 5
0
def download_data(dst_dir):

    dst_dir = os.path.abspath(dst_dir)
    tracks = pd.read_csv('raw_tracks.csv', index_col=0)
    _create_subdirs(dst_dir, tracks)

    fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY'))
    not_found = pickle.load(open('not_found.pickle', 'rb'))
    not_found['audio'] = []

    # Download missing tracks.
    for tid in tqdm(tracks.index):
        dst = utils.get_audio_path(dst_dir, tid)
        if not os.path.exists(dst):
            try:
                fma.download_track(tracks.at[tid, 'track_file'], dst)
            except:  # requests.HTTPError
                not_found['audio'].append(tid)

    pickle.dump(not_found, open('not_found.pickle', 'wb'))
def download_metadata(args):
    #creer l'objet FreeMusicArchive definit sur utils.py / connexion a l API FMA
    fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY'))

    #pointe sur recent morceau ajoute
    if args.tid_max is None:
        args.tid_max = int(fma.get_recent_tracks()[0][0])

    message = 'Collecting metadata from track ID {} to {}.'
    print(message.format(args.tid_min, args.tid_max))

    not_found = {}

    #recuperer le nombre de fichiers sous le nom de tracks par tid
    id_range = trange(args.tid_min, args.tid_max, desc='tracks')
    #affecte tous les informations obtenue par la fct predefinie get_all (tous infos sur track) et genere une frame
    tracks, not_found['tracks'] = fma.get_all('track', id_range)

    #a partir de track on genere
    id_range = tqdm(tracks['album_id'].unique(), desc='albums')
    albums, not_found['albums'] = fma.get_all('album', id_range)

    id_range = tqdm(tracks['artist_id'].unique(), desc='artists')
    artists, not_found['artists'] = fma.get_all('artist', id_range)

    genres = fma.get_all_genres()

    #Parcours chaqu'un de 'tracks', 'albums', 'artists', 'genres' et ordonne ses lignes et colonnes
    for dataset in 'tracks', 'albums', 'artists', 'genres':
        eval(dataset).sort_index(axis=0, inplace=True)
        eval(dataset).sort_index(axis=1, inplace=True)
        #convertir en CSV raw_tracks, raw_albums, raw_artists, raw_genres
        eval(dataset).to_csv('raw_' + dataset + '.csv')
        if dataset != 'genres':
            #anonce les valeurs non trouvee dans l'arXiv
            print('{}: {} collected, {} not found'.format(
                dataset, len(eval(dataset)), len(not_found[dataset])))

    pickle.dump(not_found, open('not_found.pickle', 'wb'))