Beispiel #1
0
def create_track_file(maindir, trackid, track, song, artist, mbconnect=None):
    """
    Main function to create an HDF5 song file.
    You got to have the track, song and artist already.
    If you pass an open connection to the musicbrainz database, we also use it.
    Returns True if song was created, False otherwise.
    False can mean another thread is already doing that song.
    We also check whether the path exists.
    INPUT
       maindir      - main directory of the Million Song Dataset
       trackid      - Echo Nest track id of the track object
       track        - pyechonest track object
       song         - pyechonest song object
       artist       - pyechonest artist object
       mbconnect    - open musicbrainz pg connection
    RETURN
       True if a track file was created, False otherwise
    """
    hdf5_path = os.path.join(maindir, path_from_trackid(trackid))
    if os.path.exists(hdf5_path):
        return False  # file already exists, no stress
    hdf5_path_tmp = hdf5_path + '_tmp'
    # lock the file
    got_lock = get_lock_track(trackid)
    if not got_lock:
        return False  # someone is taking care of that file
    if os.path.exists(hdf5_path):
        release_lock_track(trackid)
        return False  # got the lock too late, file exists
    # count errors (=tries), stop after 100 tries
    try_cnt = 0
    # create file and fill it
    try:
        while True:  # try until we make it work!
            try:
                # we try one more time
                try_cnt += 1
                if not os.path.isdir(os.path.split(hdf5_path)[0]):
                    os.makedirs(os.path.split(hdf5_path)[0])
                # check / delete tmp file if exist
                if os.path.isfile(hdf5_path_tmp):
                    os.remove(hdf5_path_tmp)
                # create tmp file
                HDF5.create_song_file(hdf5_path_tmp)
                h5 = HDF5.open_h5_file_append(hdf5_path_tmp)
                HDF5.fill_hdf5_from_artist(h5, artist)
                HDF5.fill_hdf5_from_song(h5, song)
                HDF5.fill_hdf5_from_track(h5, track)
                if mbconnect is not None:
                    HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect)
                # TODO billboard? lastfm? ...?
                h5.close()
            except KeyboardInterrupt:
                close_creation()
                raise
            # we dont panic, delete file, wait and retry
            except Exception as e:
                # close hdf5
                try:
                    h5.close()
                except NameError, ValueError:
                    pass
                # delete path
                try:
                    os.remove(hdf5_path_tmp)
                except IOError:
                    pass
                # print and wait
                print 'ERROR creating track:', trackid, 'on', time.ctime(
                ), '(pid=' + str(os.getpid()) + ')'
                print e
                if try_cnt < 100:
                    print '(try again in', SLEEPTIME, 'seconds)'
                    time.sleep(SLEEPTIME)
                    continue
                # give up
                else:
                    print 'we give up after', try_cnt, 'tries'
                    release_lock_track(trackid)
                    return False
            # move tmp file to real file
            shutil.move(hdf5_path_tmp, hdf5_path)
            # release lock
            release_lock_track(trackid)
            break
    # KeyboardInterrupt, we delete file, clean things up
    except KeyboardInterrupt:
        # close hdf5
        try:
            h5.close()
        except NameError, ValueError:
            pass
        # delete path
        try:
            if os.path.isfile(hdf5_path_tmp):
                os.remove(hdf5_path_tmp)
            if os.path.isfile(hdf5_path):
                os.remove(hdf5_path)
        except IOError:
            pass
        raise
Beispiel #2
0
def convert_one_song(audiofile,output,mbconnect=None,verbose=0,DESTROYAUDIO=False):
    """
    PRINCIPAL FUNCTION
    Converts one given audio file to hdf5 format (saved in 'output')
    by uploading it to The Echo Nest API
    INPUT
         audiofile   - path to a typical audio file (wav, mp3, ...)
            output   - nonexisting hdf5 path
         mbconnect   - if not None, open connection to musicbrainz server
           verbose   - if >0 display more information
      DESTROYAUDIO   - Careful! deletes audio file if everything went well
    RETURN
       1 if we think a song is created, 0 otherwise
    """
    # inputs + sanity checks
    if not os.path.exists(audiofile):
        print('ERROR: song file does not exist:',songfile)
        return 0
    if os.path.exists(output):
        print('ERROR: hdf5 output file already exist:',output,', delete or choose new path')
        return 0
    # get EN track / song / artist for that song
    if verbose>0: print('get analysis for file:',audiofile)
    track = trackEN.track_from_filename(audiofile)
    song_id = track.song_id
    song = songEN.Song(song_id)
    if verbose>0: print('found song:',song.title,'(',song_id,')')
    artist_id = song.artist_id
    artist = artistEN.Artist(artist_id)
    if verbose>0: print('found artist:',artist.name,'(',artist_id,')')
    # hack to fill missing values
    try:
        track.foreign_id
    except AttributeError:
        track.__setattr__('foreign_id','')
        if verbose>0: print('no track foreign_id found')
    try:
        track.foreign_release_id
    except AttributeError:
        track.__setattr__('foreign_release_id','')
        if verbose>0: print('no track foreign_release_id found')
    # create HDF5 file
    if verbose>0: print('create HDF5 file:',output)
    HDF5.create_song_file(output,force=False)
    # fill hdf5 file from track
    if verbose>0:
        if mbconnect is None:
            print('fill HDF5 file with info from track/song/artist')
        else:
            print('fill HDF5 file with info from track/song/artist/musicbrainz')
    h5 = HDF5.open_h5_file_append(output)
    HDF5.fill_hdf5_from_artist(h5,artist)
    HDF5.fill_hdf5_from_song(h5,song)
    HDF5.fill_hdf5_from_track(h5,track)
    if not mbconnect is None:
        HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect)
    h5.close()
    # done
    if DESTROYAUDIO:
        if verbose>0: print('We remove audio file:',audiofile)
        os.remove(audiofile)
    return 1
def create_track_file(maindir,trackid,track,song,artist,mbconnect=None):
    """
    Main function to create an HDF5 song file.
    You got to have the track, song and artist already.
    If you pass an open connection to the musicbrainz database, we also use it.
    Returns True if song was created, False otherwise.
    False can mean another thread is already doing that song.
    We also check whether the path exists.
    INPUT
       maindir      - main directory of the Million Song Dataset
       trackid      - Echo Nest track id of the track object
       track        - pyechonest track object
       song         - pyechonest song object
       artist       - pyechonest artist object
       mbconnect    - open musicbrainz pg connection
    RETURN
       True if a track file was created, False otherwise
    """
    hdf5_path = os.path.join(maindir,path_from_trackid(trackid))
    if os.path.exists( hdf5_path ):
        return False # file already exists, no stress
    hdf5_path_tmp = hdf5_path + '_tmp'
    # lock the file
    got_lock = get_lock_track(trackid)
    if not got_lock:
        return False # someone is taking care of that file
    if os.path.exists( hdf5_path ):
        release_lock_track(trackid)
        return False # got the lock too late, file exists
    # count errors (=tries), stop after 100 tries
    try_cnt = 0
    # create file and fill it
    try:
        while True: # try until we make it work!
            try:
                # we try one more time
                try_cnt += 1
                if not os.path.isdir( os.path.split(hdf5_path)[0] ):
                    os.makedirs( os.path.split(hdf5_path)[0] )
                # check / delete tmp file if exist
                if os.path.isfile(hdf5_path_tmp):
                    os.remove(hdf5_path_tmp)
                # create tmp file
                HDF5.create_song_file(hdf5_path_tmp)
                h5 = HDF5.open_h5_file_append(hdf5_path_tmp)
                HDF5.fill_hdf5_from_artist(h5,artist)
                HDF5.fill_hdf5_from_song(h5,song)
                HDF5.fill_hdf5_from_track(h5,track)
                if mbconnect is not None:
                    HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect)
                # TODO billboard? lastfm? ...?
                h5.close()
            except KeyboardInterrupt:
                close_creation()
                raise
            # we dont panic, delete file, wait and retry
            except Exception as e:
                # close hdf5
                try:
                    h5.close()
                except NameError,ValueError:
                    pass
                # delete path
                try:
                    os.remove( hdf5_path_tmp )
                except IOError:
                    pass
                # print and wait
                print 'ERROR creating track:',trackid,'on',time.ctime(),'(pid='+str(os.getpid())+')'
                print e
                if try_cnt < 100:
                    print '(try again in',SLEEPTIME,'seconds)'
                    time.sleep(SLEEPTIME)
                    continue
                # give up
                else:
                    print 'we give up after',try_cnt,'tries'
                    release_lock_track(trackid)
                    return False
            # move tmp file to real file
            shutil.move(hdf5_path_tmp, hdf5_path)
            # release lock
            release_lock_track(trackid)
            break
    # KeyboardInterrupt, we delete file, clean things up
    except KeyboardInterrupt:
        # close hdf5
        try:
            h5.close()
        except NameError,ValueError:
            pass
        # delete path
        try:
            if os.path.isfile( hdf5_path_tmp ):
                os.remove( hdf5_path_tmp )
            if os.path.isfile( hdf5_path ):
                os.remove( hdf5_path )
        except IOError:
            pass
        raise
def convert_one_song(audiofile,
                     output,
                     mbconnect=None,
                     verbose=0,
                     DESTROYAUDIO=False):
    """
    PRINCIPAL FUNCTION
    Converts one given audio file to hdf5 format (saved in 'output')
    by uploading it to The Echo Nest API
    INPUT
         audiofile   - path to a typical audio file (wav, mp3, ...)
            output   - nonexisting hdf5 path
         mbconnect   - if not None, open connection to musicbrainz server
           verbose   - if >0 display more information
      DESTROYAUDIO   - Careful! deletes audio file if everything went well
    RETURN
       1 if we think a song is created, 0 otherwise
    """
    # inputs + sanity checks
    if not os.path.exists(audiofile):
        print(('ERROR: song file does not exist:', songfile))
        return 0
    if os.path.exists(output):
        print(('ERROR: hdf5 output file already exist:', output,
               ', delete or choose new path'))
        return 0
    # get EN track / song / artist for that song
    if verbose > 0: print(('get analysis for file:', audiofile))
    track = trackEN.track_from_filename(audiofile)
    song_id = track.song_id
    song = songEN.Song(song_id)
    if verbose > 0: print(('found song:', song.title, '(', song_id, ')'))
    artist_id = song.artist_id
    artist = artistEN.Artist(artist_id)
    if verbose > 0: print(('found artist:', artist.name, '(', artist_id, ')'))
    # hack to fill missing values
    try:
        track.foreign_id
    except AttributeError:
        track.__setattr__('foreign_id', '')
        if verbose > 0: print('no track foreign_id found')
    try:
        track.foreign_release_id
    except AttributeError:
        track.__setattr__('foreign_release_id', '')
        if verbose > 0: print('no track foreign_release_id found')
    # create HDF5 file
    if verbose > 0: print(('create HDF5 file:', output))
    HDF5.create_song_file(output, force=False)
    # fill hdf5 file from track
    if verbose > 0:
        if mbconnect is None:
            print('fill HDF5 file with info from track/song/artist')
        else:
            print(
                'fill HDF5 file with info from track/song/artist/musicbrainz')
    h5 = HDF5.open_h5_file_append(output)
    HDF5.fill_hdf5_from_artist(h5, artist)
    HDF5.fill_hdf5_from_song(h5, song)
    HDF5.fill_hdf5_from_track(h5, track)
    if not mbconnect is None:
        HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect)
    h5.close()
    # done
    if DESTROYAUDIO:
        if verbose > 0: print(('We remove audio file:', audiofile))
        os.remove(audiofile)
    return 1
# the goal is to get the feature information for the learn subbase of rwc audio samples
input_dir = '/home/manu/workspace/databases/genres/jazz/'
output_dir = '/home/manu/workspace/databases/genres/jazz/hdf5/'

# Single file is working, now loop on all files from the learning directory17
#from pyechonest import track
for audiofile in features.get_filepaths(input_dir, ext='.au'):
    print "Starting work on ", audiofile    
    output = output_dir + os.path.splitext(os.path.split(audiofile)[-1])[0] + '.h5'
    if os.path.exists(output):
        continue
    file_object = open(audiofile)
    curtrack = track.track_from_file(file_object, 'au', force_upload=True)
#
    HDF5.create_song_file(output,force=False)
    h5 = HDF5.open_h5_file_append(output)
    # HACK we need to fill missing values
    curtrack.__setattr__('foreign_id','')
    curtrack.__setattr__('foreign_release_id','')
    curtrack.__setattr__('audio_md5','')
    HDF5.fill_hdf5_from_track(h5,curtrack)
    h5.close()
    del h5
    
# first testing on a single song
#audiofile = input_dir + 'rwc-g-m01_1.wav'
#output = output_dir + 'rwc-g-m01_1.h5'

# MSdB API not convenient need to identify the song
#from enpyapi_to_hdf5 import convert_one_song