def gen_song_tuples(root_path):
    # Parameters
    # root_path: string -- path to MillionSongSubset's data folder
    #
    # Returns: lists of tuples where each tuple is a song (database row)
    
    filename_re = "^[A-Z]{7}[0-9,A-F]{11}\.(h5|analysis)$" # Example: TRBIJIA128F425F57D.h5

    for loop_nr, song_rec in enumerate(iterate_songs.iterate_folder_songs_extracted(root_path, filename_re)):
        
        timbre = song_rec.timbre
        sections_start = song_rec.sections_start
        sections_conf = song_rec.sections_conf
        segments_start = song_rec.segments_start
        song_end = song_rec.song_end

        artist = song_rec.artist
        title = song_rec.title
        id = song_rec.id

        try:
            feature_vector = get_feature_vector(timbre,sections_start,sections_conf,segments_start,song_end)
            if feature_vector is None:
                print 'skipping'
                continue

            start, end = get_start_end(sections_start, sections_conf, song_end)
            tup = (unicode_if_str(title, 'utf-8'), unicode_if_str(artist, 'utf-8'),
            unicode_if_str(id, 'utf-8'), start, end, unicode_if_str(features_as_str(feature_vector), 'utf-8'))

            yield tup
        except Exception as e:
            sys.stderr.write('exception for {0}, {1}: {2}'.format(title, id, str(e)))
def save_feature_database(root_path,csvpath):
    # Create a database where each song is represented by a line in a CSV file
    # as such: artist/title , echonest id , feature_values
    #
    # Parameters
    # root_path: string -- path to MillionSongSubset's data folder
    # csvpath: string -- path where the .csv database will be created
    #
    # Returns: none
    
    filename_re = "^[A-Z]{7}[0-9,A-F]{11}\.h5$" # Example: TRBIJIA128F425F57D.h5
    time_start = time()
    for loop_nr, song_rec in enumerate(iterate_songs.iterate_folder_songs_extracted(root_path, filename_re)):
        
        btchromas = song_rec.btchromas
        feature_vector,key = create_feature_vector(btchromas,'majmin')
        artist = song_rec.artist
        title = song_rec.title
        id = song_rec.id
        print id
        print artist,':',title
        print feature_vector
        print_estimated_info(feature_vector,8,key)
        save_feature_vector(feature_vector,artist,title,id,csvpath)
        
        if ( (loop_nr + 1) % 1000) == 0:
            print "{0} songs read in {1:.1f} seconds" \
                .format(loop_nr + 1, time() - time_start)

    end_time = time() - time_start
    print "Total: {0} songs read in {1:.1f} seconds".format(loop_nr + 1, end_time)

#if __name__ == '__main__':
    # Main program
    
    #root_path = '/Users/victoriadennis/Documents/databases/MillionSongSubset/data'
    #csvpath = '/Users/victoriadennis/Documents/databases/MillionSongSubset/features/songs.csv'
    #filename_re = 'TRBIJIA128F425F57D.h5'
    #save_feature_database(root_path,csvpath)