def gen_song_tuples(root_path): # Parameters # root_path: string -- path to MillionSongSubset's data folder # # Returns: lists of tuples where each tuple is a song (database row) filename_re = "^[A-Z]{7}[0-9,A-F]{11}\.(h5|analysis)$" # Example: TRBIJIA128F425F57D.h5 for loop_nr, song_rec in enumerate(iterate_songs.iterate_folder_songs_extracted(root_path, filename_re)): timbre = song_rec.timbre sections_start = song_rec.sections_start sections_conf = song_rec.sections_conf segments_start = song_rec.segments_start song_end = song_rec.song_end artist = song_rec.artist title = song_rec.title id = song_rec.id try: feature_vector = get_feature_vector(timbre,sections_start,sections_conf,segments_start,song_end) if feature_vector is None: print 'skipping' continue start, end = get_start_end(sections_start, sections_conf, song_end) tup = (unicode_if_str(title, 'utf-8'), unicode_if_str(artist, 'utf-8'), unicode_if_str(id, 'utf-8'), start, end, unicode_if_str(features_as_str(feature_vector), 'utf-8')) yield tup except Exception as e: sys.stderr.write('exception for {0}, {1}: {2}'.format(title, id, str(e)))
def save_feature_database(root_path,csvpath): # Create a database where each song is represented by a line in a CSV file # as such: artist/title , echonest id , feature_values # # Parameters # root_path: string -- path to MillionSongSubset's data folder # csvpath: string -- path where the .csv database will be created # # Returns: none filename_re = "^[A-Z]{7}[0-9,A-F]{11}\.h5$" # Example: TRBIJIA128F425F57D.h5 time_start = time() for loop_nr, song_rec in enumerate(iterate_songs.iterate_folder_songs_extracted(root_path, filename_re)): btchromas = song_rec.btchromas feature_vector,key = create_feature_vector(btchromas,'majmin') artist = song_rec.artist title = song_rec.title id = song_rec.id print id print artist,':',title print feature_vector print_estimated_info(feature_vector,8,key) save_feature_vector(feature_vector,artist,title,id,csvpath) if ( (loop_nr + 1) % 1000) == 0: print "{0} songs read in {1:.1f} seconds" \ .format(loop_nr + 1, time() - time_start) end_time = time() - time_start print "Total: {0} songs read in {1:.1f} seconds".format(loop_nr + 1, end_time) #if __name__ == '__main__': # Main program #root_path = '/Users/victoriadennis/Documents/databases/MillionSongSubset/data' #csvpath = '/Users/victoriadennis/Documents/databases/MillionSongSubset/features/songs.csv' #filename_re = 'TRBIJIA128F425F57D.h5' #save_feature_database(root_path,csvpath)