def create_track_file(maindir, trackid, track, song, artist, mbconnect=None): """ Main function to create an HDF5 song file. You got to have the track, song and artist already. If you pass an open connection to the musicbrainz database, we also use it. Returns True if song was created, False otherwise. False can mean another thread is already doing that song. We also check whether the path exists. INPUT maindir - main directory of the Million Song Dataset trackid - Echo Nest track id of the track object track - pyechonest track object song - pyechonest song object artist - pyechonest artist object mbconnect - open musicbrainz pg connection RETURN True if a track file was created, False otherwise """ hdf5_path = os.path.join(maindir, path_from_trackid(trackid)) if os.path.exists(hdf5_path): return False # file already exists, no stress hdf5_path_tmp = hdf5_path + '_tmp' # lock the file got_lock = get_lock_track(trackid) if not got_lock: return False # someone is taking care of that file if os.path.exists(hdf5_path): release_lock_track(trackid) return False # got the lock too late, file exists # count errors (=tries), stop after 100 tries try_cnt = 0 # create file and fill it try: while True: # try until we make it work! try: # we try one more time try_cnt += 1 if not os.path.isdir(os.path.split(hdf5_path)[0]): os.makedirs(os.path.split(hdf5_path)[0]) # check / delete tmp file if exist if os.path.isfile(hdf5_path_tmp): os.remove(hdf5_path_tmp) # create tmp file HDF5.create_song_file(hdf5_path_tmp) h5 = HDF5.open_h5_file_append(hdf5_path_tmp) HDF5.fill_hdf5_from_artist(h5, artist) HDF5.fill_hdf5_from_song(h5, song) HDF5.fill_hdf5_from_track(h5, track) if mbconnect is not None: HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect) # TODO billboard? lastfm? ...? h5.close() except KeyboardInterrupt: close_creation() raise # we dont panic, delete file, wait and retry except Exception as e: # close hdf5 try: h5.close() except NameError, ValueError: pass # delete path try: os.remove(hdf5_path_tmp) except IOError: pass # print and wait print 'ERROR creating track:', trackid, 'on', time.ctime( ), '(pid=' + str(os.getpid()) + ')' print e if try_cnt < 100: print '(try again in', SLEEPTIME, 'seconds)' time.sleep(SLEEPTIME) continue # give up else: print 'we give up after', try_cnt, 'tries' release_lock_track(trackid) return False # move tmp file to real file shutil.move(hdf5_path_tmp, hdf5_path) # release lock release_lock_track(trackid) break # KeyboardInterrupt, we delete file, clean things up except KeyboardInterrupt: # close hdf5 try: h5.close() except NameError, ValueError: pass # delete path try: if os.path.isfile(hdf5_path_tmp): os.remove(hdf5_path_tmp) if os.path.isfile(hdf5_path): os.remove(hdf5_path) except IOError: pass raise
def convert_one_song(audiofile,output,mbconnect=None,verbose=0,DESTROYAUDIO=False): """ PRINCIPAL FUNCTION Converts one given audio file to hdf5 format (saved in 'output') by uploading it to The Echo Nest API INPUT audiofile - path to a typical audio file (wav, mp3, ...) output - nonexisting hdf5 path mbconnect - if not None, open connection to musicbrainz server verbose - if >0 display more information DESTROYAUDIO - Careful! deletes audio file if everything went well RETURN 1 if we think a song is created, 0 otherwise """ # inputs + sanity checks if not os.path.exists(audiofile): print('ERROR: song file does not exist:',songfile) return 0 if os.path.exists(output): print('ERROR: hdf5 output file already exist:',output,', delete or choose new path') return 0 # get EN track / song / artist for that song if verbose>0: print('get analysis for file:',audiofile) track = trackEN.track_from_filename(audiofile) song_id = track.song_id song = songEN.Song(song_id) if verbose>0: print('found song:',song.title,'(',song_id,')') artist_id = song.artist_id artist = artistEN.Artist(artist_id) if verbose>0: print('found artist:',artist.name,'(',artist_id,')') # hack to fill missing values try: track.foreign_id except AttributeError: track.__setattr__('foreign_id','') if verbose>0: print('no track foreign_id found') try: track.foreign_release_id except AttributeError: track.__setattr__('foreign_release_id','') if verbose>0: print('no track foreign_release_id found') # create HDF5 file if verbose>0: print('create HDF5 file:',output) HDF5.create_song_file(output,force=False) # fill hdf5 file from track if verbose>0: if mbconnect is None: print('fill HDF5 file with info from track/song/artist') else: print('fill HDF5 file with info from track/song/artist/musicbrainz') h5 = HDF5.open_h5_file_append(output) HDF5.fill_hdf5_from_artist(h5,artist) HDF5.fill_hdf5_from_song(h5,song) HDF5.fill_hdf5_from_track(h5,track) if not mbconnect is None: HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect) h5.close() # done if DESTROYAUDIO: if verbose>0: print('We remove audio file:',audiofile) os.remove(audiofile) return 1
def create_track_file(maindir,trackid,track,song,artist,mbconnect=None): """ Main function to create an HDF5 song file. You got to have the track, song and artist already. If you pass an open connection to the musicbrainz database, we also use it. Returns True if song was created, False otherwise. False can mean another thread is already doing that song. We also check whether the path exists. INPUT maindir - main directory of the Million Song Dataset trackid - Echo Nest track id of the track object track - pyechonest track object song - pyechonest song object artist - pyechonest artist object mbconnect - open musicbrainz pg connection RETURN True if a track file was created, False otherwise """ hdf5_path = os.path.join(maindir,path_from_trackid(trackid)) if os.path.exists( hdf5_path ): return False # file already exists, no stress hdf5_path_tmp = hdf5_path + '_tmp' # lock the file got_lock = get_lock_track(trackid) if not got_lock: return False # someone is taking care of that file if os.path.exists( hdf5_path ): release_lock_track(trackid) return False # got the lock too late, file exists # count errors (=tries), stop after 100 tries try_cnt = 0 # create file and fill it try: while True: # try until we make it work! try: # we try one more time try_cnt += 1 if not os.path.isdir( os.path.split(hdf5_path)[0] ): os.makedirs( os.path.split(hdf5_path)[0] ) # check / delete tmp file if exist if os.path.isfile(hdf5_path_tmp): os.remove(hdf5_path_tmp) # create tmp file HDF5.create_song_file(hdf5_path_tmp) h5 = HDF5.open_h5_file_append(hdf5_path_tmp) HDF5.fill_hdf5_from_artist(h5,artist) HDF5.fill_hdf5_from_song(h5,song) HDF5.fill_hdf5_from_track(h5,track) if mbconnect is not None: HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect) # TODO billboard? lastfm? ...? h5.close() except KeyboardInterrupt: close_creation() raise # we dont panic, delete file, wait and retry except Exception as e: # close hdf5 try: h5.close() except NameError,ValueError: pass # delete path try: os.remove( hdf5_path_tmp ) except IOError: pass # print and wait print 'ERROR creating track:',trackid,'on',time.ctime(),'(pid='+str(os.getpid())+')' print e if try_cnt < 100: print '(try again in',SLEEPTIME,'seconds)' time.sleep(SLEEPTIME) continue # give up else: print 'we give up after',try_cnt,'tries' release_lock_track(trackid) return False # move tmp file to real file shutil.move(hdf5_path_tmp, hdf5_path) # release lock release_lock_track(trackid) break # KeyboardInterrupt, we delete file, clean things up except KeyboardInterrupt: # close hdf5 try: h5.close() except NameError,ValueError: pass # delete path try: if os.path.isfile( hdf5_path_tmp ): os.remove( hdf5_path_tmp ) if os.path.isfile( hdf5_path ): os.remove( hdf5_path ) except IOError: pass raise
def convert_one_song(audiofile, output, mbconnect=None, verbose=0, DESTROYAUDIO=False): """ PRINCIPAL FUNCTION Converts one given audio file to hdf5 format (saved in 'output') by uploading it to The Echo Nest API INPUT audiofile - path to a typical audio file (wav, mp3, ...) output - nonexisting hdf5 path mbconnect - if not None, open connection to musicbrainz server verbose - if >0 display more information DESTROYAUDIO - Careful! deletes audio file if everything went well RETURN 1 if we think a song is created, 0 otherwise """ # inputs + sanity checks if not os.path.exists(audiofile): print(('ERROR: song file does not exist:', songfile)) return 0 if os.path.exists(output): print(('ERROR: hdf5 output file already exist:', output, ', delete or choose new path')) return 0 # get EN track / song / artist for that song if verbose > 0: print(('get analysis for file:', audiofile)) track = trackEN.track_from_filename(audiofile) song_id = track.song_id song = songEN.Song(song_id) if verbose > 0: print(('found song:', song.title, '(', song_id, ')')) artist_id = song.artist_id artist = artistEN.Artist(artist_id) if verbose > 0: print(('found artist:', artist.name, '(', artist_id, ')')) # hack to fill missing values try: track.foreign_id except AttributeError: track.__setattr__('foreign_id', '') if verbose > 0: print('no track foreign_id found') try: track.foreign_release_id except AttributeError: track.__setattr__('foreign_release_id', '') if verbose > 0: print('no track foreign_release_id found') # create HDF5 file if verbose > 0: print(('create HDF5 file:', output)) HDF5.create_song_file(output, force=False) # fill hdf5 file from track if verbose > 0: if mbconnect is None: print('fill HDF5 file with info from track/song/artist') else: print( 'fill HDF5 file with info from track/song/artist/musicbrainz') h5 = HDF5.open_h5_file_append(output) HDF5.fill_hdf5_from_artist(h5, artist) HDF5.fill_hdf5_from_song(h5, song) HDF5.fill_hdf5_from_track(h5, track) if not mbconnect is None: HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect) h5.close() # done if DESTROYAUDIO: if verbose > 0: print(('We remove audio file:', audiofile)) os.remove(audiofile) return 1
# the goal is to get the feature information for the learn subbase of rwc audio samples input_dir = '/home/manu/workspace/databases/genres/jazz/' output_dir = '/home/manu/workspace/databases/genres/jazz/hdf5/' # Single file is working, now loop on all files from the learning directory17 #from pyechonest import track for audiofile in features.get_filepaths(input_dir, ext='.au'): print "Starting work on ", audiofile output = output_dir + os.path.splitext(os.path.split(audiofile)[-1])[0] + '.h5' if os.path.exists(output): continue file_object = open(audiofile) curtrack = track.track_from_file(file_object, 'au', force_upload=True) # HDF5.create_song_file(output,force=False) h5 = HDF5.open_h5_file_append(output) # HACK we need to fill missing values curtrack.__setattr__('foreign_id','') curtrack.__setattr__('foreign_release_id','') curtrack.__setattr__('audio_md5','') HDF5.fill_hdf5_from_track(h5,curtrack) h5.close() del h5 # first testing on a single song #audiofile = input_dir + 'rwc-g-m01_1.wav' #output = output_dir + 'rwc-g-m01_1.h5' # MSdB API not convenient need to identify the song #from enpyapi_to_hdf5 import convert_one_song