Example #1
0
def debug_from_song_file(connect, h5path, verbose=0):
    """
    Slow debugging function that takes a h5 file, reads the info,
    check the match with musicbrainz db, prints out the result.
    Only prints when we dont get exact match!
    RETURN counts of how many files we filled for years, tags
    """
    import hdf5_utils as HDF5
    import hdf5_getters as GETTERS
    h5 = HDF5.open_h5_file_read(h5path)
    title = GETTERS.get_title(h5)
    release = GETTERS.get_release(h5)
    artist = GETTERS.get_artist_name(h5)
    ambid = GETTERS.get_artist_mbid(h5)
    h5.close()
    # mbid
    gotmbid = 1
    if ambid == '':
        gotmbid = 0
        if verbose > 0: print('no mb id for:', artist)
    # year
    year = find_year_safemode(connect, ambid, title, release, artist)
    gotyear = 1 if year > 0 else 0
    if verbose > 0: print('no years for:', artist, '|', release, '|', title)
    # tags
    tags, counts = get_artist_tags(connect, ambid)
    gottags = 1 if len(tags) > 0 else 0
    if gottags == 0 and verbose > 0: print('no tags for:', artist)
    # return indicator for mbid, year, tag
    return gotmbid, gotyear, gottags
Example #2
0
def debug_from_song_file(connect,h5path,verbose=0):
    """
    Slow debugging function that takes a h5 file, reads the info,
    check the match with musicbrainz db, prints out the result.
    Only prints when we dont get exact match!
    RETURN counts of how many files we filled for years, tags
    """
    import hdf5_utils as HDF5
    import hdf5_getters as GETTERS
    h5 = HDF5.open_h5_file_read(h5path)
    title = GETTERS.get_title(h5)
    release = GETTERS.get_release(h5)
    artist = GETTERS.get_artist_name(h5)
    ambid = GETTERS.get_artist_mbid(h5)
    h5.close()
    # mbid
    gotmbid=1
    if ambid=='':
        gotmbid = 0
        if verbose>0: print('no mb id for:',artist)
    # year
    year = find_year_safemode(connect,ambid,title,release,artist)
    gotyear = 1 if year > 0 else 0
    if verbose>0: print('no years for:',artist,'|',release,'|',title)
    # tags
    tags,counts = get_artist_tags(connect,ambid)
    gottags = 1 if len(tags) > 0 else 0
    if gottags == 0 and verbose>0: print('no tags for:',artist)
    # return indicator for mbid, year, tag
    return gotmbid,gotyear,gottags
def load_non_time_data():
    years = []
    ten_features=[]
    num = 0
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            h5 = getter.open_h5_file_read(f)
            num += 1
            print(num)
            try:
                year = getter.get_year(h5)
                if year!=0:
                    years.append(year)
                    title_length = len(getter.get_title(h5))
                    terms_length = len(getter.get_artist_terms(h5))
                    tags_length = len(getter.get_artist_mbtags(h5))
                    hotness = getter.get_artist_hotttnesss(h5)
                    duration = getter.get_duration(h5)
                    loudness = getter.get_loudness(h5)
                    mode = getter.get_mode(h5)
                    release_length = len(getter.get_release(h5))
                    tempo = getter.get_tempo(h5)
                    name_length = len(getter.get_artist_name(h5))
                    ten_feature = np.hstack([title_length,tags_length, hotness, duration,
                                             terms_length, loudness, mode, release_length, tempo, name_length])
                    ten_features.append(ten_feature) 
            except:
                print(1)
            h5.close()
    return years,ten_features
Example #4
0
def _extractSongData(file_path, filename):
    # song_id, title, release, artist_name, year
    h5 = hdf5_getters.open_h5_file_read(file_path)
    track_id = filename[:-3]
    song_id = hdf5_getters.get_song_id(h5).decode('UTF-8')
    dig7_id = hdf5_getters.get_track_7digitalid(h5)
    title = hdf5_getters.get_title(h5).decode('UTF-8')
    release = hdf5_getters.get_release(h5).decode('UTF-8')
    artist_name = hdf5_getters.get_artist_name(h5).decode('UTF-8')
    year = hdf5_getters.get_year(h5)
    h5.close()
    # print(song_id, track_id, dig7_id, title, release, artist_name, year)
    return track_id, song_id, dig7_id, title, release, artist_name, year
Example #5
0
def func_to_get_desired_values(filename, returnValue = False):
    """
    This function does 3 simple things:
    - open the song file
    - get the elements we want and put them in
    - close the file
    INPUT : 
    filename    - The name of the h5 file to be loaded
    """
    global all_desired_data
    # Open file
    h5 = GETTERS.open_h5_file_read(filename)

    # Create and fill a record
    record = []
    for element in elementsRequested:
        result = getattr(GETTERS, element)(h5)
        try:
            if result == '':
                result = 'Adlen - void'
        except:
            pass
        try:
            if isinstance(result, np.ndarray):
                if len(result) > 1:
                    result = float(np.mean(result))
                else:
                    result = ''
        except:
            try:
                result = float(result)
            except:
                pass
        record.append(result)

    song_id = GETTERS.get_track_id(h5)
    artist_name = GETTERS.get_artist_name(h5)
    title = GETTERS.get_title(h5)
    artist_mbtags = GETTERS.get_artist_mbtags(h5)
    release = GETTERS.get_release(h5)

    song_id = unicode(song_id.decode('utf-8'))
    title = unicode(title.decode('utf-8'))
    artist_name = unicode(artist_name.decode('utf-8'))
    if not returnValue:
        all_desired_data.append([[[song_id, title, artist_name, elementsRequested], artist_name, title, artist_mbtags, release], record])
    
    h5.close()
    
    if returnValue:
        return [[[song_id, title, artist_name, elementsRequested], artist_name, title, artist_mbtags, release], record]
def load_raw_data():
    years = []
    ten_features=[]
    timbres = []
    pitches = []
    min_length = 10000
    num = 0
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            h5 = getter.open_h5_file_read(f)
            num += 1
            print(num)
            try:
                year = getter.get_year(h5)
                if year!=0:
                    timbre = getter.get_segments_timbre(h5)
                    s = np.size(timbre,0)
                    if s>=100:
                        if s<min_length:
                            min_length = s
                        pitch = getter.get_segments_pitches(h5)
                        years.append(year)
                        timbres.append(timbre)
                        pitches.append(pitch)
                        title_length = len(getter.get_title(h5))
                        terms_length = len(getter.get_artist_terms(h5))
                        tags_length = len(getter.get_artist_mbtags(h5))
                        hotness = getter.get_artist_hotttnesss(h5)
                        duration = getter.get_duration(h5)
                        loudness = getter.get_loudness(h5)
                        mode = getter.get_mode(h5)
                        release_length = len(getter.get_release(h5))
                        tempo = getter.get_tempo(h5)
                        name_length = len(getter.get_artist_name(h5))
                        ten_feature = np.hstack([title_length, hotness, duration, tags_length,
                                                 terms_length,loudness, mode, release_length, tempo, name_length])

                        ten_features.append(ten_feature) 
            except:
                print(1)
            h5.close()
    return years, timbres, pitches,min_length,ten_features
Example #7
0
def h5_to_csv_fields(h5,song):
	'''Converts h5 format to text
		Inputs: h5, an h5 file object, usable with the wrapper code MSongsDB
			song, an integer, representing which song in the h5 file to take the info out of (h5 files contain many songs)
		Output: a string representing all the information of this song, as a single line of a csv file
	'''
	rv=[]
	##All these are regular getter functions from wrapper code
	rv.append(gt.get_artist_name(h5,song))
	rv.append(gt.get_title(h5, song))
	rv.append(gt.get_release(h5, song))
	rv.append(gt.get_year(h5,song))
	rv.append(gt.get_duration(h5,song))
	rv.append(gt.get_artist_familiarity(h5,song))
	rv.append(gt.get_artist_hotttnesss(h5,song))
	rv.append(gt.get_song_hotttnesss(h5, song))
	
	##artist_terms, artist_terms_freq, and artist_terms_weight getter functions
	##are all arrays, so we need to turn them into strings first. We used '_' as a separator
	rv.append(array_to_csv_field(list(gt.get_artist_terms(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_artist_terms_freq(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_artist_terms_weight(h5,song))))
	rv.append(gt.get_mode(h5,song))
	rv.append(gt.get_key(h5,song))
	rv.append(gt.get_tempo(h5,song))
	rv.append(gt.get_loudness(h5,song))
	rv.append(gt.get_danceability(h5,song))
	rv.append(gt.get_energy(h5,song))
	rv.append(gt.get_time_signature(h5,song))
	rv.append(array_to_csv_field(list(gt.get_segments_start(h5,song))))
	##These arrays have vectors (Arrays) as items, 12 dimensional each
	##An array like [[1,2,3],[4,5,6]] will be written to csv as '1;2;3_4;5;6', i.e. there's two types of separators
	rv.append(double_Array_to_csv_field(list(gt.get_segments_timbre(h5,song)),'_',';'))
	rv.append(double_Array_to_csv_field(list(gt.get_segments_pitches(h5,song)),'_',';'))
	rv.append(array_to_csv_field(list(gt.get_segments_loudness_start(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_segments_loudness_max(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_segments_loudness_max_time(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_sections_start(h5,song))))
	##turn this list into a string with comma separators (i.e. a csv line)
	rv_string=array_to_csv_field(rv, ",")
	rv_string+="\n"
	return rv_string
def get_all_data(target, basedir, ext='.h5') :

    # header
    target.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
                 "track_id", "song_id", "title", "artist_name", "artist_location",
                 "artist_hotttnesss", "release", "year", "song_hotttnesss",
                 "danceability", "duration", "loudness", "sample_rate", "tempo"
    ))

    count = 0
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            for line in f:
                new_file = open("tmp.txt", 'w')
                new_file.write(line)

                h5 = hdf5_getters.open_h5_file_read(new_file)
                target.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
                              hdf5_getters.get_track_id(h5),
                              hdf5_getters.get_song_id(h5),
                              hdf5_getters.get_title(h5),
                              hdf5_getters.get_artist_name(h5),
                              hdf5_getters.get_artist_location(h5),
                              hdf5_getters.get_artist_hotttnesss(h5),
                              hdf5_getters.get_release(h5),
                              hdf5_getters.get_year(h5),
                              hdf5_getters.get_song_hotttnesss(h5),
                              hdf5_getters.get_danceability(h5),
                              hdf5_getters.get_duration(h5),
                              hdf5_getters.get_loudness(h5),
                              hdf5_getters.get_analysis_sample_rate(h5),
                              hdf5_getters.get_tempo(h5)
                ))

                # show progress
                count += 1
                print "%d/10000" % (count)

                h5.close()
Example #9
0
def getURLFromH5(h5path):
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):', h5path
        sys.exit(0)
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
            print preview
            return preview
            sys.exit(0)
Example #10
0
def get_all_attributes(filename):
    """
    This function does 3 simple things:
    - open the song file
    - get all required attributes
    - write it to a csv file 
    - close the files
    """
    with open('attributes.csv', 'a') as csvfile:
        try:
            # let's apply the previous function to all files
            csvwriter = csv.writer(csvfile, delimiter='\t')
            h5 = GETTERS.open_h5_file_read(filename)
            RESULTS = []
            RESULTS.append(GETTERS.get_year(h5))
            RESULTS.append(GETTERS.get_artist_id(h5))
            RESULTS.append(GETTERS.get_artist_name(h5))
            RESULTS.append(GETTERS.get_artist_mbid(h5))
            RESULTS.append(convert_terms(GETTERS.get_artist_terms(h5)))
            RESULTS.append(GETTERS.get_artist_hotttnesss(h5))
            RESULTS.append(GETTERS.get_artist_latitude(h5))
            RESULTS.append(GETTERS.get_artist_longitude(h5))
            RESULTS.append(GETTERS.get_artist_familiarity(h5))
            RESULTS.append(GETTERS.get_danceability(h5))
            RESULTS.append(GETTERS.get_duration(h5))
            RESULTS.append(GETTERS.get_energy(h5))
            RESULTS.append(GETTERS.get_loudness(h5))
            RESULTS.append(GETTERS.get_song_hotttnesss(h5))
            RESULTS.append(GETTERS.get_song_id(h5))
            RESULTS.append(GETTERS.get_tempo(h5))
            RESULTS.append(GETTERS.get_time_signature(h5))
            RESULTS.append(GETTERS.get_title(h5))
            RESULTS.append(GETTERS.get_track_id(h5))
            RESULTS.append(GETTERS.get_release(h5))
            csvwriter.writerow(RESULTS)
            h5.close()
        except AttributeError:
            pass
Example #11
0
def getURLFromH5(h5path):
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):',h5path
        sys.exit(0)
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    print "Suggested Song URLs For you"
    print "==========================="
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
	    print preview
            return preview
            sys.exit(0)
def hd5_single_random_file_parser():
    # Open an h5 file in read mode
    h5 = hdf5_getters.open_h5_file_read(
        '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5'
    )

    function_tracker = filter(
        lambda x: x.startswith('get'),
        hdf5_getters.__dict__.keys())  # Detects all the getter functions

    for f in function_tracker:  # Print everything in function tracker
        print(f)

    # First effort to check what each field contains.
    print()  # 55 available fields (exluding number of songs fields)
    print("Num of songs -- ",
          hdf5_getters.get_num_songs(h5))  # One song per file
    print("Title -- ",
          hdf5_getters.get_title(h5))  # Print the title of a specific h5 file
    print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5))
    print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5))
    print("Artist ID -- ", hdf5_getters.get_artist_id(h5))
    print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5))
    print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5))
    print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5))
    print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5))
    print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5))
    print("Artist location -- ", hdf5_getters.get_artist_location(h5))
    print("Artist Name -- ", hdf5_getters.get_artist_name(h5))
    print("Release -- ", hdf5_getters.get_release(h5))
    print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5))
    print("Song ID -- ", hdf5_getters.get_song_id(h5))
    print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5))
    print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5))
    print("Similar artists -- ", hdf5_getters.get_similar_artists(h5))
    print("Artist terms -- ", hdf5_getters.get_artist_terms(h5))
    print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5))
    print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5))
    print("Analysis sample rate -- ",
          hdf5_getters.get_analysis_sample_rate(h5))
    print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5))
    print("Danceability -- ", hdf5_getters.get_danceability(h5))
    print("Duration -- ", hdf5_getters.get_duration(h5))
    print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5))
    print("Energy -- ", hdf5_getters.get_energy(h5))
    print("Key -- ", hdf5_getters.get_key(h5))
    print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5))
    print("Loudness -- ", hdf5_getters.get_loudness(h5))
    print("Mode -- ", hdf5_getters.get_mode(h5))
    print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5))
    print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5))
    print("Tempo -- ", hdf5_getters.get_tempo(h5))
    print("Time signature -- ", hdf5_getters.get_time_signature(h5))
    print("Time signature confidence -- ",
          hdf5_getters.get_time_signature_confidence(h5))
    print("Track ID -- ", hdf5_getters.get_track_id(h5))
    print("Segments Start -- ", hdf5_getters.get_segments_start(h5))
    print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5))
    print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5))
    print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5))
    print("Segments Loudness max -- ",
          hdf5_getters.get_segments_loudness_max(h5))
    print("Segments Loudness max time-- ",
          hdf5_getters.get_segments_loudness_max_time(h5))
    print("Segments Loudness start -- ",
          hdf5_getters.get_segments_loudness_start(h5))
    print("Sections start -- ", hdf5_getters.get_sections_start(h5))
    print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5))
    print("Beats start -- ", hdf5_getters.get_beats_start(h5))
    print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5))
    print("Bars start -- ", hdf5_getters.get_bars_start(h5))
    print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5))
    print("Tatums start -- ", hdf5_getters.get_tatums_start(h5))
    print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5))
    print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5))
    print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5))
    print("Year -- ", hdf5_getters.get_year(h5))

    fields = ['Title', 'Artist ID']

    with open('Tester2.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';')

        # writing the fields
        csv_writer.writerow(fields)

        # writing the data rows
        csv_writer.writerow(
            [hdf5_getters.get_title(h5),
             hdf5_getters.get_artist_id(h5)])

    h5.close()  # close h5 when completed in the end
Example #13
0
 #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array
 #audio_md5 = GETTERS.get_audio_md5(h5, i)
 #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array
 #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array
 #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array
 #beats_start = ','.join(str(e) for e in GETTERS.get_beats_start(h5, i)) # array
 danceability = GETTERS.get_danceability(h5, i)
 duration = GETTERS.get_duration(h5, i)
 end_of_fade_in = GETTERS.get_end_of_fade_in(h5, i)
 energy = GETTERS.get_energy(h5, i)
 key = GETTERS.get_key(h5, i)
 key_confidence = GETTERS.get_key_confidence(h5, i)
 loudness = GETTERS.get_loudness(h5, i)
 mode = GETTERS.get_mode(h5, i)
 mode_confidence = GETTERS.get_mode_confidence(h5, i)
 release = GETTERS.get_release(h5, i)
 release_7digitalid = GETTERS.get_release_7digitalid(h5, i)
 #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array
 #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array
 #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array
 #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array
 #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array
 #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array
 #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array
 #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array
 #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array
 similar_artists = ','.join(
     str(e)
     for e in GETTERS.get_similar_artists(h5, i))  # array
 song_hotttnesss = GETTERS.get_song_hotttnesss(h5, i)
 song_id = GETTERS.get_song_id(h5, i)
 #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array
 #audio_md5 = GETTERS.get_audio_md5(h5, i)
 #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array
 #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array
 #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array
 #beats_start = ','.join(str(e) for e in GETTERS.get_beats_start(h5, i)) # array
 danceability = GETTERS.get_danceability(h5, i)
 duration = GETTERS.get_duration(h5, i)
 end_of_fade_in = GETTERS.get_end_of_fade_in(h5, i)
 energy = GETTERS.get_energy(h5, i)
 key = GETTERS.get_key(h5, i)
 key_confidence = GETTERS.get_key_confidence(h5, i)
 loudness = GETTERS.get_loudness(h5, i)
 mode = GETTERS.get_mode(h5, i)
 mode_confidence = GETTERS.get_mode_confidence(h5, i)
 release = GETTERS.get_release(h5, i)
 release_7digitalid = GETTERS.get_release_7digitalid(h5, i)
 #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array
 #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array
 #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array
 #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array
 #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array
 #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array
 #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array
 #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array
 #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array
 similar_artists = ','.join(str(e) for e in GETTERS.get_similar_artists(h5, i)) # array
 song_hotttnesss = GETTERS.get_song_hotttnesss(h5, i)
 song_id = GETTERS.get_song_id(h5, i)
 start_of_fade_out = GETTERS.get_start_of_fade_out(h5, i)
 #tatums_confidence = ','.join(str(e) for e in GETTERS.get_tatums_confidence(h5, i)) # array
Example #15
0
def getInfo(files):
    data = []
    build_str = ''
    with open(sys.argv[1], 'r') as f:
        contents = f.read()
        c = contents.split()
    f.close()
    print("creating csv with following fields:" + contents)
    for i in c:
        build_str = build_str + i + ','
    build_str = build_str[:-1]
    build_str = build_str + '\n'
    for fil in files:
        curFile = getters.open_h5_file_read(fil)
        d2 = {}
        get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)}
        tid = fil.split('/')[-1].split('.')[0]
        # print(c)
        for i in c:
            if i in get_table: 
               d2[i] = get_table[i]
               d2[i] = str(d2[i]).replace('\n','')  
               build_str = build_str + d2[i] + ','
            else:
                print('error: unspecified field')
                exit(0)
        build_str = build_str[:-1]
        # print(build_str[:-1])
        build_str = build_str + '\n'
        curFile.close()
    build_str = build_str.replace('b','').replace("'",'').replace('"','')  
    return (build_str)
def data_to_flat_file(basedir,ext='.h5') :
    """This function extract the information from the tables and creates the flat file."""	
    count = 0;	#song counter
    list_to_write= []
    row_to_write = ""
    writer = csv.writer(open("metadata_wholeA.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    print f	#the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
	    comma=title.find(',')	#eliminating commas in the title
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')	#eliminating commas in the album	
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')	#eliminating double quotes
	    duration = hdf5_getters.get_duration(h5)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	            artist_hotness=-1
	    artist_id = hdf5_getters.get_artist_id(h5)
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
	    artist_loc = hdf5_getters.get_artist_location(h5)
		#checks artist_loc to see if it is a hyperlink if it is set as empty string
	    artist_loc = artist_loc.replace(",", "\,");
	    if artist_loc.startswith("<a"):
                artist_loc = ""
	    if len(artist_loc) > 100:
                artist_loc = ""
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            song_key = hdf5_getters.get_key(h5)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
	    mode = hdf5_getters.get_mode(h5)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
	    song_id = hdf5_getters.get_song_id(h5)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
	    time_sig = hdf5_getters.get_time_signature(h5)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
	    track_id = hdf5_getters.get_track_id(h5)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
	    year = hdf5_getters.get_year(h5)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
	    bars_c_avg= get_avg(bars_c)
	    bars_c_max= get_max(bars_c)
	    bars_c_min = get_min(bars_c)
	    bars_c_stddev= get_stddev(bars_c)
	    bars_c_count = get_count(bars_c)
	    bars_c_sum = get_sum(bars_c)
	    bars_start = hdf5_getters.get_bars_start(h5)
	    bars_start_avg = get_avg(bars_start)
	    bars_start_max= get_max(bars_start)
	    bars_start_min = get_min(bars_start)
	    bars_start_stddev= get_stddev(bars_start)
	    bars_start_count = get_count(bars_start)
	    bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg= get_avg(beats_c)
	    beats_c_max= get_max(beats_c)
	    beats_c_min = get_min(beats_c)
	    beats_c_stddev= get_stddev(beats_c)
	    beats_c_count = get_count(beats_c)
	    beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
 	    beats_start_avg = get_avg(beats_start)
	    beats_start_max= get_max(beats_start)
	    beats_start_min = get_min(beats_start)
	    beats_start_stddev= get_stddev(beats_start)
	    beats_start_count = get_count(beats_start)
	    beats_start_sum = get_sum(beats_start)
	    sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg= get_avg(sec_c)
	    sec_c_max= get_max(sec_c)
	    sec_c_min = get_min(sec_c)
	    sec_c_stddev= get_stddev(sec_c)
	    sec_c_count = get_count(sec_c)
	    sec_c_sum = get_sum(sec_c)
	    sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
	    sec_start_max= get_max(sec_start)
	    sec_start_min = get_min(sec_start)
	    sec_start_stddev= get_stddev(sec_start)
	    sec_start_count = get_count(sec_start)
	    sec_start_sum = get_sum(sec_start)
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    seg_c_avg= get_avg(seg_c)
	    seg_c_max= get_max(seg_c)
	    seg_c_min = get_min(seg_c)
	    seg_c_stddev= get_stddev(seg_c)
	    seg_c_count = get_count(seg_c)
	    seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg= get_avg(seg_loud_max)
	    seg_loud_max_max= get_max(seg_loud_max)
	    seg_loud_max_min = get_min(seg_loud_max)
	    seg_loud_max_stddev= get_stddev(seg_loud_max)
	    seg_loud_max_count = get_count(seg_loud_max)
	    seg_loud_max_sum = get_sum(seg_loud_max)
	    seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
	    seg_loud_max_time_avg= get_avg(seg_loud_max_time)
	    seg_loud_max_time_max= get_max(seg_loud_max_time)
	    seg_loud_max_time_min = get_min(seg_loud_max_time)
	    seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
	    seg_loud_max_time_count = get_count(seg_loud_max_time)
	    seg_loud_max_time_sum = get_sum(seg_loud_max_time)
	    seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
	    seg_loud_start_avg= get_avg(seg_loud_start)
	    seg_loud_start_max= get_max(seg_loud_start)
	    seg_loud_start_min = get_min(seg_loud_start)
	    seg_loud_start_stddev= get_stddev(seg_loud_start)
	    seg_loud_start_count = get_count(seg_loud_start)
	    seg_loud_start_sum = get_sum(seg_loud_start)					      
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    pitch_size = len(seg_pitch)
	    seg_start = hdf5_getters.get_segments_start(h5)
	    seg_start_avg= get_avg(seg_start)
	    seg_start_max= get_max(seg_start)
	    seg_start_min = get_min(seg_start)
	    seg_start_stddev= get_stddev(seg_start)
	    seg_start_count = get_count(seg_start)
	    seg_start_sum = get_sum(seg_start)
	    seg_timbre = hdf5_getters.get_segments_timbre(h5)
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    tatms_c_avg= get_avg(tatms_c)
	    tatms_c_max= get_max(tatms_c)
	    tatms_c_min = get_min(tatms_c)
	    tatms_c_stddev= get_stddev(tatms_c)
	    tatms_c_count = get_count(tatms_c)
	    tatms_c_sum = get_sum(tatms_c)
	    tatms_start = hdf5_getters.get_tatums_start(h5)
	    tatms_start_avg= get_avg(tatms_start)
	    tatms_start_max= get_max(tatms_start)
	    tatms_start_min = get_min(tatms_start)
	    tatms_start_stddev= get_stddev(tatms_start)
	    tatms_start_count = get_count(tatms_start)
	    tatms_start_sum = get_sum(tatms_start)
	
	    #Getting the genres
	    genre_set = 0    #flag to see if the genre has been set or not
	    art_trm = hdf5_getters.get_artist_terms(h5)
	    trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			    for i in genres_so_far:
				final_genre.append(i)
				genre_set=1				#genre was found in dictionary
				  
		
	    
	    if genre_set == 1:
		    col_num=[]
		   
		    for genre in final_genre:
			    column=int(genre)				#getting the column number of the genre
			    col_num.append(column)

		    genre_array=genre_columns(col_num)	         #genre array
 	    else:
		    genre_array=genre_columns(-1)		#the genre was not found in the dictionary

	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1
		


		#Writing to the flat file
            writer.writerow([title,album,artist_name,year,duration,seg_start_count, tempo])

	    h5.close()
	    count=count+1;
	    print count;
Example #17
0
def main():
    outputFile = open('songs.csv', 'w')
    writer = csv.writer(outputFile)

    csvRowString = "song_number,artist_familiarity,artist_hotttnesss,artist_id,artist_mbid,artist_playmeid,artist_7digitalid,artist_latitude,artist_longitude,artist_location,artist_name,release,release_7digitalid,song_id,song_hotttnesss,title,track_7digitalid,analysis_sample_rate,audio_md5,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out,tempo,time_signature,time_signature_confidence,track_id,year"

    outputFile.write(csvRowString + "\n")
    csvRowString = ""

    #################################################
    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "."  # "." As the default means the current directory
    ext = ".H5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    songCount = 0
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print(f)

            songH5File = hdf5_getters.open_h5_file_read(f)

            values = [
                songCount,
                hdf5_getters.get_artist_familiarity(songH5File),
                hdf5_getters.get_artist_hotttnesss(songH5File),
                hdf5_getters.get_artist_id(songH5File),
                hdf5_getters.get_artist_mbid(songH5File),
                hdf5_getters.get_artist_playmeid(songH5File),
                hdf5_getters.get_artist_7digitalid(songH5File),
                hdf5_getters.get_artist_latitude(songH5File),
                hdf5_getters.get_artist_longitude(songH5File),
                hdf5_getters.get_artist_location(songH5File),
                hdf5_getters.get_artist_name(songH5File),
                hdf5_getters.get_release(songH5File),
                hdf5_getters.get_release_7digitalid(songH5File),
                hdf5_getters.get_song_id(songH5File),
                hdf5_getters.get_song_hotttnesss(songH5File),
                hdf5_getters.get_title(songH5File),
                hdf5_getters.get_track_7digitalid(songH5File),
                hdf5_getters.get_analysis_sample_rate(songH5File),
                hdf5_getters.get_audio_md5(songH5File),
                hdf5_getters.get_danceability(songH5File),
                hdf5_getters.get_duration(songH5File),
                hdf5_getters.get_end_of_fade_in(songH5File),
                hdf5_getters.get_energy(songH5File),
                hdf5_getters.get_key(songH5File),
                hdf5_getters.get_key_confidence(songH5File),
                hdf5_getters.get_loudness(songH5File),
                hdf5_getters.get_mode(songH5File),
                hdf5_getters.get_mode_confidence(songH5File),
                hdf5_getters.get_start_of_fade_out(songH5File),
                hdf5_getters.get_tempo(songH5File),
                hdf5_getters.get_time_signature(songH5File),
                hdf5_getters.get_time_signature_confidence(songH5File),
                hdf5_getters.get_track_id(songH5File),
                hdf5_getters.get_year(songH5File)
            ]
            songH5File.close()
            songCount = songCount + 1

            writer.writerow(values)

    outputFile.close()
    for root, dirs, files in os.walk(os.getcwd()):
        for name in files:
            if name.endswith(".h5"):
                if process_completion % 10000 == 0:
                    print "done :", process_completion/10000.0, "%"
                    process_completion += 1
                else:
                    process_completion += 1

                tempPath = os.path.abspath(os.path.join(root,name))
                h5file = hdf5_getters.open_h5_file_read(tempPath)

                #data extract 1
                track_id_str = hdf5_getters.get_track_id(h5file)
                song_id_str = hdf5_getters.get_song_id(h5file)
                album_name_str = unicodedata.normalize('NFKD', unicode(hdf5_getters.get_release(h5file),encoding='ASCII',errors='ignore'))
                album_name_str = str(album_name_str).replace(","," ").replace("'","").replace("-"," ").replace("("," ").replace(")"," ").replace("/"," ").replace("\\"," ")
                year_str = str(hdf5_getters.get_year(h5file))

                counter += 1
                file_io_counter += 1
                if file_io_counter % 100 == 0:
                    if counter == 1:
                        my_array_data_extract_1 = numpy.array([track_id_str, song_id_str, album_name_str, year_str])
                    else :
                        my_array_data_extract_1 = numpy.vstack((my_array_data_extract_1,numpy.array([track_id_str, song_id_str, album_name_str, year_str])))

                    f_handle = file('msd_data_extract_1.bin','a')
                    numpy.savetxt(f_handle, my_array_data_extract_1, delimiter='|',fmt='%s')
                    f_handle.close()
def get_fields(files):
    tracks = []
    counts = {}
    field_counts = []
    for file in files:
        h5 = hdf5_getters.open_h5_file_read(file)
        t = {}
        t['artist_familiarity'] = hdf5_getters.get_artist_familiarity(
            h5)  # estimation
        t['artist_hotttnesss'] = hdf5_getters.get_artist_hotttnesss(
            h5)  # estimation
        t['artist_name'] = hdf5_getters.get_artist_name(h5)  # artist name
        t['release'] = hdf5_getters.get_release(h5)  # album name
        t['title'] = hdf5_getters.get_title(h5)  # title
        t['len_similar_artists'] = len(
            hdf5_getters.get_similar_artists(h5))  # number of similar artists
        t['analysis_sample_rate'] = hdf5_getters.get_analysis_sample_rate(
            h5)  # sample rate of the audio used ?????????
        t['duration'] = hdf5_getters.get_duration(h5)  # seconds
        t['key'] = hdf5_getters.get_key(h5)  # key the song is in
        t['key_confidence'] = hdf5_getters.get_key_confidence(
            h5)  # confidence measure
        t['loudness'] = hdf5_getters.get_loudness(h5)  # overall loudness in dB
        t['mode_confidence'] = hdf5_getters.get_mode_confidence(
            h5)  # confidence measure
        t['start_of_fade_out'] = hdf5_getters.get_start_of_fade_out(
            h5)  # time in sec
        t['tempo'] = hdf5_getters.get_tempo(h5)  # estimated tempo in BPM
        t['time_signature'] = hdf5_getters.get_time_signature(
            h5)  # estimate of number of beats per bar, e.g. 4
        t['year'] = hdf5_getters.get_year(
            h5)  # song release year from MusicBrainz or 0

        timbre = hdf5_getters.get_segments_timbre(
            h5)  # 2D float array, texture features (MFCC+PCA-like)
        t['segments_timbre'] = timbre
        t['timbre_avg'] = timbre.mean(axis=0)  # list of 12 averages
        cov_mat_timbre = np.cov(timbre, rowvar=False)
        cov_timbre = []
        for i in range(len(cov_mat_timbre)):
            for j in range(len(cov_mat_timbre) - i):
                cov_timbre.append(cov_mat_timbre[i][j])
        t['timbre_cov'] = cov_timbre  # list of 78 covariances

        pitch = hdf5_getters.get_segments_pitches(
            h5)  # 2D float array, chroma feature, one value per note
        t['segments_pitch'] = pitch
        t['pitch_avg'] = pitch.mean(axis=0)  # list of 12 averages
        cov_mat_pitch = np.cov(pitch, rowvar=False)
        cov_pitch = []
        for i in range(len(cov_mat_pitch)):
            for j in range(len(cov_mat_pitch) - i):
                cov_pitch.append(cov_mat_timbre[i][j])
        t['pitch_cov'] = cov_pitch  # list of 78 covariances

        # seg_pitch = hdf5_getters.get_segments_pitches(h5)  # 2D float array, chroma feature, one value per note
        # print(seg_pitch.shape)

        # t['artist_latitude'] = hdf5_getters.get_artist_latitude(h5)  # float, ????????????????????????????????????????
        # t['artist_longitude'] = hdf5_getters.get_artist_longitude(h5)  # float, ??????????????????????????????????????
        # t['artist_location'] = hdf5_getters.get_artist_location(h5)  # location name
        # t['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss(h5)  # estimation
        # t['danceability'] = hdf5_getters.get_danceability(h5)  # estimation
        # t['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5)  # seconds at the beginning of the song
        # t['energy'] = hdf5_getters.get_energy(h5)  # energy from listener point of view
        # t['mode'] = hdf5_getters.get_mode(h5)  # major or minor
        # t['time_signature_confidence'] = hdf5_getters.get_time_signature_confidence(h5)  # confidence measure
        # t['artist_mbtags_count'] = len(hdf5_getters.get_artist_mbtags_count(h5))  # array int, tag counts for musicbrainz tags
        # bad types or non arithmatic numbers
        '''
        # t['audio_md5'] = hdf5_getters.get_audio_md5(h5)  # hash code of the audio used for the analysis by The Echo Nest
        # t['artist_terms_weight'] = hdf5_getters.get_artist_terms_weight(h5)  # array float, echonest tags weight ?????
        # t['artist_terms_freq'] = hdf5_getters.get_artist_terms_freq(h5)  # array float, echonest tags freqs ??????????
        # t['artist_terms'] = hdf5_getters.get_artist_terms(h5)  # array string, echonest tags ?????????????????????????
        # t['artist_id'] = hdf5_getters.get_artist_id(h5)  # echonest id
        # t['artist_mbid'] = hdf5_getters.get_artist_mbid(h5)  # musicbrainz id
        # t['artist_playmeid'] = hdf5_getters.get_artist_playmeid(h5)  # playme id
        # t['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid(h5)  # 7digital id
        # t['release_7digitalid'] = hdf5_getters.get_release_7digitalid(h5)  # 7digital id
        # t['song_id'] = hdf5_getters.get_song_id(h5)  # echonest id
        # t['track_7digitalid'] = hdf5_getters.get_track_7digitalid(h5)  # 7digital id
        # t['similar_artists'] = hdf5_getters.get_similar_artists(h5)  # string array of sim artist ids
        # t['track_id'] = hdf5_getters.get_track_id(h5)  # echonest track id
        # t['segments_start'] = hdf5_getters.get_segments_start(h5)  # array floats, musical events, ~ note onsets
        # t['segments_confidence'] = hdf5_getters.get_segments_confidence(h5)  # array floats, confidence measure
        # t['segments_pitches'] = hdf5_getters.get_segments_pitches(h5)  # 2D float array, chroma feature, one value per note
        # t['segments_timbre'] = hdf5_getters.get_segments_timbre(h5)  # 2D float array, texture features (MFCC+PCA-like)
        # t['segments_loudness_max'] = hdf5_getters.get_segments_loudness_max(h5)  # float array, max dB value
        # t['segments_loudness_max_time'] = hdf5_getters.get_segments_loudness_max_time(h5)  # float array, time of max dB value, i.e. end of attack
        # t['segments_loudness_start'] = hdf5_getters.get_segments_loudness_start(h5)  # array float, dB value at onset
        # t['sections_start'] = hdf5_getters.get_sections_start(h5)  # array float, largest grouping in a song, e.g. verse
        # t['sections_confidence'] = hdf5_getters.get_sections_confidence(h5)  # array float, confidence measure
        # t['beats_start'] = hdf5_getters.get_beats_start(h5)  # array float, result of beat tracking
        # t['beats_confidence'] = hdf5_getters.get_beats_confidence(h5)  # array float, confidence measure
        # t['bars_start'] = hdf5_getters.get_bars_start(h5)  # array float, beginning of bars, usually on a beat
        # t['bars_confidence'] = hdf5_getters.get_bars_confidence(h5)  # array float, confidence measure
        # t['tatums_start'] = hdf5_getters.get_tatums_start(h5)  # array float, smallest rythmic element
        # t['tatums_confidence'] = hdf5_getters.get_tatums_confidence(h5)  # array float, confidence measure
        # t['artist_mbtags'] = hdf5_getters.get_artist_mbtags(h5)  # array string, tags from musicbrainz.org 
        '''
        h5.close()

        for key, value in t.items():
            if isinstance(value, float) and math.isnan(value):
                pass
            if type(value) is np.ndarray:
                if key in counts.keys():
                    counts[key] += 1
                else:
                    counts[key] = 1
            elif value:
                if key in counts.keys():
                    counts[key] += 1
                else:
                    counts[key] = 1
            elif key not in counts.keys():
                counts[key] = 0

        count = 0
        for key, value in t.items():
            if isinstance(value, float) and math.isnan(value):
                pass
            elif type(value) is np.ndarray:
                count += 1
            elif value:
                count += 1
        field_counts.append(count)

        # progress bar
        if num_of_tracks >= 100:
            i = files.index(file) + 1
            scale = num_of_tracks / 100
            if i % math.ceil(len(files) * .05) == 0:
                sys.stdout.write('\r')
                # the exact output you're looking for:
                sys.stdout.write("Loading dataframe: [%-100s] %d%%" %
                                 ('=' * int(i // scale), 1 / scale * i))
                sys.stdout.flush()
                time.sleep(.01)

        tracks.append(t)
    print()
    return tracks, counts, field_counts
Example #20
0
def main():
    outputFileName = sys.argv[2]
    outputFile1 = open(outputFileName, 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            # outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = (
            "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"
            +
            "ArtistLongitude,ArtistFamiliarity,ArtistHotttnesss,ArtistName," +
            "ArtistMBTags,ArtistTerms," +
            "Danceability,Energy,Duration,KeySignature," +
            "KeySignatureConfidence,Loudness,Mode,Hotttnesss,Tempo,TimeSignature,TimeSignatureConfidence,"
            + "Title,Year")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        # outputFile1.write("SongNumber,");
        # outputFile1.write(csvRowString + "\n");
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = sys.argv[1]  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistFamiliarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.artistHotttnesss = str(
                hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.artistMBTags = ','.join(
                hdf5_getters.get_artist_mbtags(songH5File))
            # song.artistMBTagsCount = ','.join(hdf5_getters.get_artist_mbtags_count(songH5File))
            song.artistTerms = ','.join(
                hdf5_getters.get_artist_terms(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))

            #print song count
            # csvRowString += str(song.songCount) + ","

            rowString = json.dumps({
                'AlbumID': song.albumID,
                'AlbumName': song.albumName,
                'ArtistID': song.artistID,
                'ArtistLatitude': song.artistLatitude,
                'ArtistLocation': song.artistLocation,
                'ArtistLongitude': song.artistLongitude,
                'ArtistFamiliarity': song.artistFamiliarity,
                'ArtistHotttnesss': song.artistHotttnesss,
                'ArtistName': song.artistName,
                'ArtistMBTags': song.artistMBTags,
                'ArtistTerms': song.artistTerms,
                'Danceability': song.danceability,
                'Energy': song.energy,
                'Duration': song.duration,
                'KeySignature': song.keySignature,
                'KeySignatureConfidence': song.keySignatureConfidence,
                'Loudness': song.loudness,
                'Mode': song.mode,
                'Hotttnesss': song.hotttnesss,
                'Tempo': song.tempo,
                'SongID': song.id,
                'TimeSignature': song.timeSignature,
                'TimeSignatureConfidence': song.timeSignatureConfidence,
                'Title': song.title,
                'Year': song.year,
            })

            #Remove the final comma from each row in the csv
            rowString += "\n"
            outputFile1.write(rowString)

            songH5File.close()

    outputFile1.close()
def data_to_flat_file(basedir, ext='.h5'):
    """ This function extracts the information from the tables and creates the flat file. """
    count = 0
    #song counter
    list_to_write = []
    group_index = 0
    row_to_write = ""
    writer = csv.writer(open("complete.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            row = []
            print f
            h5 = hdf5_getters.open_h5_file_read(f)
            title = hdf5_getters.get_title(h5)
            title = title.replace('"', '')
            row.append(title)
            comma = title.find(',')
            if comma != -1:
                print title
                time.sleep(1)
            album = hdf5_getters.get_release(h5)
            album = album.replace('"', '')
            row.append(album)
            comma = album.find(',')
            if comma != -1:
                print album
                time.sleep(1)
            artist_name = hdf5_getters.get_artist_name(h5)
            comma = artist_name.find(',')
            if comma != -1:
                print artist_name
                time.sleep(1)
            artist_name = artist_name.replace('"', '')
            row.append(artist_name)
            duration = hdf5_getters.get_duration(h5)
            row.append(duration)
            samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            row.append(samp_rt)
            artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            row.append(artist_7digitalid)
            artist_fam = hdf5_getters.get_artist_familiarity(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_fam) == True:
                artist_fam = -1
            row.append(artist_fam)
            artist_hotness = hdf5_getters.get_artist_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_hotness) == True:
                artist_hotness = -1
            row.append(artist_hotness)
            artist_id = hdf5_getters.get_artist_id(h5)
            row.append(artist_id)
            artist_lat = hdf5_getters.get_artist_latitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lat) == True:
                artist_lat = -1
            row.append(artist_lat)
            artist_loc = hdf5_getters.get_artist_location(h5)
            row.append(artist_loc)
            artist_lon = hdf5_getters.get_artist_longitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lon) == True:
                artist_lon = -1
            row.append(artist_lon)
            artist_mbid = hdf5_getters.get_artist_mbid(h5)
            row.append(artist_mbid)

            #Getting the genre
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
            trn_wght = hdf5_getters.get_artist_terms_weight(h5)
            a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
            genre_indexes = get_genre_indexes(
                trm_freq)  #index of the highest freq
            genre_set = 0  #flag to see if the genre has been set or not
            final_genre = []
            genres_so_far = []
            for i in range(len(genre_indexes)):
                genre_tmp = get_genre(
                    art_trm, genre_indexes[i]
                )  #genre that corresponds to the highest freq
                genres_so_far = genre_dict.get_genre_in_dict(
                    genre_tmp)  #getting the genre from the dictionary
                if len(genres_so_far) != 0:
                    for i in genres_so_far:
                        final_genre.append(i)
                        genre_set = 1

            if genre_set == 1:
                col_num = []
                for i in final_genre:
                    column = int(i)  #getting the column number of the genre
                    col_num.append(column)

                genre_array = genre_columns(col_num)  #genre array
                for i in range(len(
                        genre_array)):  #appending the genre_array to the row
                    row.append(genre_array[i])
            else:
                genre_array = genre_columns(
                    -1
                )  #when there is no genre matched, return an array of [0...0]
                for i in range(len(
                        genre_array)):  #appending the genre_array to the row
                    row.append(genre_array[i])

            artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            row.append(artist_pmid)
            audio_md5 = hdf5_getters.get_audio_md5(h5)
            row.append(audio_md5)
            danceability = hdf5_getters.get_danceability(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(danceability) == True:
                danceability = -1
            row.append(danceability)
            end_fade_in = hdf5_getters.get_end_of_fade_in(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(end_fade_in) == True:
                end_fade_in = -1
            row.append(end_fade_in)
            energy = hdf5_getters.get_energy(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(energy) == True:
                energy = -1
            row.append(energy)
            song_key = hdf5_getters.get_key(h5)
            row.append(song_key)
            key_c = hdf5_getters.get_key_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(key_c) == True:
                key_c = -1
            row.append(key_c)
            loudness = hdf5_getters.get_loudness(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(loudness) == True:
                loudness = -1
            row.append(loudness)
            mode = hdf5_getters.get_mode(h5)
            row.append(mode)
            mode_conf = hdf5_getters.get_mode_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(mode_conf) == True:
                mode_conf = -1
            row.append(mode_conf)
            release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            row.append(release_7digitalid)
            song_hot = hdf5_getters.get_song_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(song_hot) == True:
                song_hot = -1
            row.append(song_hot)
            song_id = hdf5_getters.get_song_id(h5)
            row.append(song_id)
            start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            row.append(start_fade_out)
            tempo = hdf5_getters.get_tempo(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(tempo) == True:
                tempo = -1
            row.append(tempo)
            time_sig = hdf5_getters.get_time_signature(h5)
            row.append(time_sig)
            time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(time_sig_c) == True:
                time_sig_c = -1
            row.append(time_sig_c)
            track_id = hdf5_getters.get_track_id(h5)
            row.append(track_id)
            track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            row.append(track_7digitalid)
            year = hdf5_getters.get_year(h5)
            row.append(year)
            bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_start = hdf5_getters.get_bars_start(h5)
            row_bars_padding = padding(
                245
            )  #this is the array that will be attached at the end of th row

            #--------------bars---------------"
            gral_info = []
            gral_info = row[:]
            empty = []
            for i, item in enumerate(bars_c):
                row.append(group_index)
                row.append(i)
                row.append(bars_c[i])
                bars_c_avg = get_avg(bars_c)
                row.append(bars_c_avg)
                bars_c_max = get_max(bars_c)
                row.append(bars_c_max)
                bars_c_min = get_min(bars_c)
                row.append(bars_c_min)
                bars_c_stddev = get_stddev(bars_c)
                row.append(bars_c_stddev)
                bars_c_count = get_count(bars_c)
                row.append(bars_c_count)
                bars_c_sum = get_sum(bars_c)
                row.append(bars_c_sum)
                row.append(bars_start[i])
                bars_start_avg = get_avg(bars_start)
                row.append(bars_start_avg)
                bars_start_max = get_max(bars_start)
                row.append(bars_start_max)
                bars_start_min = get_min(bars_start)
                row.append(bars_start_min)
                bars_start_stddev = get_stddev(bars_start)
                row.append(bars_start_stddev)
                bars_start_count = get_count(bars_start)
                row.append(bars_start_count)
                bars_start_sum = get_sum(bars_start)
                row.append(bars_start_sum)
                for i in row_bars_padding:
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    #--------beats---------------"
            beats_c = hdf5_getters.get_beats_confidence(h5)
            group_index = 1
            row = []
            row = gral_info[:]
            row_front = padding(
                14)  #blanks left in front of the row(empty spaces for bars)
            row_beats_padding = padding(231)
            for i, item in enumerate(beats_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the beats
                    row.append(index)

                row.append(beats_c[i])
                beats_c_avg = get_avg(beats_c)
                row.append(beats_c_avg)
                beats_c_max = get_max(beats_c)
                row.append(beats_c_max)
                beats_c_min = get_min(beats_c)
                row.append(beats_c_min)
                beats_c_stddev = get_stddev(beats_c)
                row.append(beats_c_stddev)
                beats_c_count = get_count(beats_c)
                row.append(beats_c_count)
                beats_c_sum = get_sum(beats_c)
                row.append(beats_c_sum)
                beats_start = hdf5_getters.get_beats_start(h5)
                row.append(beats_start[i])
                beats_start_avg = get_avg(beats_start)
                row.append(beats_start_avg)
                beats_start_max = get_max(beats_start)
                row.append(beats_start_max)
                beats_start_min = get_min(beats_start)
                row.append(beats_start_min)
                beats_start_stddev = get_stddev(beats_start)
                row.append(beats_start_stddev)
                beats_start_count = get_count(beats_start)
                row.append(beats_start_count)
                beats_start_sum = get_sum(beats_start)
                row.append(beats_start_sum)
                for i in row_beats_padding:
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    # "--------sections---------------"
            row_sec_padding = padding(
                217)  #blank spaces left at the end of the row
            sec_c = hdf5_getters.get_sections_confidence(h5)
            group_index = 2
            row = []
            row = gral_info[:]
            row_front = padding(
                28)  #blank spaces left in front(empty spaces for bars,beats)
            for i, item in enumerate(sec_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the sections
                    row.append(index)

                row.append(sec_c[i])
                sec_c_avg = get_avg(sec_c)
                row.append(sec_c_avg)
                sec_c_max = get_max(sec_c)
                row.append(sec_c_max)
                sec_c_min = get_min(sec_c)
                row.append(sec_c_min)
                sec_c_stddev = get_stddev(sec_c)
                row.append(sec_c_stddev)
                sec_c_count = get_count(sec_c)
                row.append(sec_c_count)
                sec_c_sum = get_sum(sec_c)
                row.append(sec_c_sum)
                sec_start = hdf5_getters.get_sections_start(h5)
                row.append(sec_start[i])
                sec_start_avg = get_avg(sec_start)
                row.append(sec_start_avg)
                sec_start_max = get_max(sec_start)
                row.append(sec_start_max)
                sec_start_min = get_min(sec_start)
                row.append(sec_start_min)
                sec_start_stddev = get_stddev(sec_start)
                row.append(sec_start_stddev)
                sec_start_count = get_count(sec_start)
                row.append(sec_start_count)
                sec_start_sum = get_sum(sec_start)
                row.append(sec_start_sum)
                for i in row_sec_padding:  #appending the blank spaces at the end of the row
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    #--------segments---------------"
            row_seg_padding = padding(182)  #blank spaces at the end of the row
            row_front = padding(42)  #blank spaces left in front of segments
            seg_c = hdf5_getters.get_segments_confidence(h5)
            group_index = 3
            row = []
            row = gral_info[:]
            for i, item in enumerate(seg_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the segments
                    row.append(index)

                row.append(seg_c[i])
                seg_c_avg = get_avg(seg_c)
                row.append(seg_c_avg)
                seg_c_max = get_max(seg_c)
                row.append(seg_c_max)
                seg_c_min = get_min(seg_c)
                row.append(seg_c_min)
                seg_c_stddev = get_stddev(seg_c)
                row.append(seg_c_stddev)
                seg_c_count = get_count(seg_c)
                row.append(seg_c_count)
                seg_c_sum = get_sum(seg_c)
                row.append(seg_c_sum)
                seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
                row.append(seg_loud_max[i])
                seg_loud_max_avg = get_avg(seg_loud_max)
                row.append(seg_loud_max_avg)
                seg_loud_max_max = get_max(seg_loud_max)
                row.append(seg_loud_max_max)
                seg_loud_max_min = get_min(seg_loud_max)
                row.append(seg_loud_max_min)
                seg_loud_max_stddev = get_stddev(seg_loud_max)
                row.append(seg_loud_max_stddev)
                seg_loud_max_count = get_count(seg_loud_max)
                row.append(seg_loud_max_count)
                seg_loud_max_sum = get_sum(seg_loud_max)
                row.append(seg_loud_max_sum)
                seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(
                    h5)
                row.append(seg_loud_max_time[i])
                seg_loud_max_time_avg = get_avg(seg_loud_max_time)
                row.append(seg_loud_max_time_avg)
                seg_loud_max_time_max = get_max(seg_loud_max_time)
                row.append(seg_loud_max_time_max)
                seg_loud_max_time_min = get_min(seg_loud_max_time)
                row.append(seg_loud_max_time_min)
                seg_loud_max_time_stddev = get_stddev(seg_loud_max_time)
                row.append(seg_loud_max_time_stddev)
                seg_loud_max_time_count = get_count(seg_loud_max_time)
                row.append(seg_loud_max_time_count)
                seg_loud_max_time_sum = get_sum(seg_loud_max_time)
                row.append(seg_loud_max_time_sum)
                seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
                row.append(seg_loud_start[i])
                seg_loud_start_avg = get_avg(seg_loud_start)
                row.append(seg_loud_start_avg)
                seg_loud_start_max = get_max(seg_loud_start)
                row.append(seg_loud_start_max)
                seg_loud_start_min = get_min(seg_loud_start)
                row.append(seg_loud_start_min)
                seg_loud_start_stddev = get_stddev(seg_loud_start)
                row.append(seg_loud_start_stddev)
                seg_loud_start_count = get_count(seg_loud_start)
                row.append(seg_loud_start_count)
                seg_loud_start_sum = get_sum(seg_loud_start)
                row.append(seg_loud_start_sum)
                seg_start = hdf5_getters.get_segments_start(h5)
                row.append(seg_start[i])
                seg_start_avg = get_avg(seg_start)
                row.append(seg_start_avg)
                seg_start_max = get_max(seg_start)
                row.append(seg_start_max)
                seg_start_min = get_min(seg_start)
                row.append(seg_start_min)
                seg_start_stddev = get_stddev(seg_start)
                row.append(seg_start_stddev)
                seg_start_count = get_count(seg_start)
                row.append(seg_start_count)
                seg_start_sum = get_sum(seg_start)
                row.append(seg_start_sum)
                for i in row_seg_padding:  #appending blank spaces at the end of the row
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

            #----------segments pitch and timbre---------------"
            row_seg2_padding = padding(
                14)  #blank spaces left at the end of the row
            row_front = padding(
                77)  #blank spaces left at the front of the segments and timbre
            seg_pitch = hdf5_getters.get_segments_pitches(h5)
            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            group_index = 4
            row = []
            row = gral_info[:]
            for i, item in enumerate(transpose_pitch[0]):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of segments and timbre
                    row.append(index)

                row.append(transpose_pitch[0][i])
                seg_pitch_avg = get_avg(transpose_pitch[0])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[0])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[0])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[0])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[0])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[0])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[1][i])
                seg_pitch_avg = get_avg(transpose_pitch[1])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[1])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[1])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[1])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[1])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[1])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[2][i])
                seg_pitch_avg = get_avg(transpose_pitch[2])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[2])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[2])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[2])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[2])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[2])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[3][i])
                seg_pitch_avg = get_avg(transpose_pitch[3])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[3])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[3])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[3])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[3])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[3])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[4][i])
                seg_pitch_avg = get_avg(transpose_pitch[4])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[4])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[4])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[4])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[4])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[4])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[5][i])
                seg_pitch_avg = get_avg(transpose_pitch[5])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[5])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[5])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[5])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[5])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[5])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[6][i])
                seg_pitch_avg = get_avg(transpose_pitch[6])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[6])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[6])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[6])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[6])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[6])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[7][i])
                seg_pitch_avg = get_avg(transpose_pitch[7])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[7])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[7])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[7])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[7])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[7])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[8][i])
                seg_pitch_avg = get_avg(transpose_pitch[8])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[8])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[8])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[8])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[8])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[8])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[9][i])
                seg_pitch_avg = get_avg(transpose_pitch[9])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[9])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[9])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[9])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[9])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[9])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[10][i])
                seg_pitch_avg = get_avg(transpose_pitch[10])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[10])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[10])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[10])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[10])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[10])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[11][i])
                seg_pitch_avg = get_avg(transpose_pitch[11])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[11])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[11])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[11])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[11])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[11])
                row.append(seg_pitch_sum)
                #timbre arrays
                seg_timbre = hdf5_getters.get_segments_timbre(h5)
                transpose_timbre = seg_pitch.transpose(
                )  #tranposing matrix, to have 12 rows
                row.append(transpose_timbre[0][i])
                seg_timbre_avg = get_avg(transpose_timbre[0])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[0])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[0])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[0])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[0])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[0])
                row.append(seg_timbre_sum)
                row.append(transpose_timbre[1][i])
                seg_timbre_avg = get_avg(transpose_timbre[1])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[1])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[1])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[1])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[1])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[1])
                row.append(seg_timbre_sum)
                row.append(transpose_timbre[2][i])
                seg_timbre_avg = get_avg(transpose_timbre[2])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[2])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[2])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[2])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[2])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[2])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[3][i])
                seg_timbre_avg = get_avg(transpose_timbre[3])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[3])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[3])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[3])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[3])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[3])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[4][i])
                seg_timbre_avg = get_avg(transpose_timbre[4])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[4])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[4])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[4])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[4])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[4])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[5][i])
                seg_timbre_avg = get_avg(transpose_timbre[5])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[5])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[5])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[5])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[5])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[5])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[6][i])
                seg_timbre_avg = get_avg(transpose_timbre[6])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[6])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[6])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[6])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[6])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[6])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[7][i])
                seg_timbre_avg = get_avg(transpose_timbre[7])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[7])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[7])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[7])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[7])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[7])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[8][i])
                seg_timbre_avg = get_avg(transpose_timbre[8])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[8])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[8])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[8])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[8])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[8])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[9][i])
                seg_timbre_avg = get_avg(transpose_timbre[9])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[9])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[9])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[9])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[9])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[9])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[10][i])
                seg_timbre_avg = get_avg(transpose_timbre[10])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[10])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[10])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[10])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[10])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[10])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[11][i])
                seg_timbre_avg = get_avg(transpose_timbre[11])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[11])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[11])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[11])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[11])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[11])
                row.append(seg_timbre_sum)
                for item in row_seg2_padding:
                    row.append(item)
                writer.writerow(row)
                row = []
                row = gral_info[:]

    # "--------tatums---------------"
            tatms_c = hdf5_getters.get_tatums_confidence(h5)
            group_index = 5
            row_front = padding(245)  #blank spaces left in front of tatums
            row = []
            row = gral_info[:]
            for i, item in enumerate(tatms_c):
                row.append(group_index)
                row.append(i)
                for item in row_front:  #appending blank spaces at the front of the row
                    row.append(item)

                row.append(tatms_c[i])
                tatms_c_avg = get_avg(tatms_c)
                row.append(tatms_c_avg)
                tatms_c_max = get_max(tatms_c)
                row.append(tatms_c_max)
                tatms_c_min = get_min(tatms_c)
                row.append(tatms_c_min)
                tatms_c_stddev = get_stddev(tatms_c)
                row.append(tatms_c_stddev)
                tatms_c_count = get_count(tatms_c)
                row.append(tatms_c_count)
                tatms_c_sum = get_sum(tatms_c)
                row.append(tatms_c_sum)
                tatms_start = hdf5_getters.get_tatums_start(h5)
                row.append(tatms_start[i])
                tatms_start_avg = get_avg(tatms_start)
                row.append(tatms_start_avg)
                tatms_start_max = get_max(tatms_start)
                row.append(tatms_start_max)
                tatms_start_min = get_min(tatms_start)
                row.append(tatms_start_min)
                tatms_start_stddev = get_stddev(tatms_start)
                row.append(tatms_start_stddev)
                tatms_start_count = get_count(tatms_start)
                row.append(tatms_start_count)
                tatms_start_sum = get_sum(tatms_start)
                row.append(tatms_start_sum)
                writer.writerow(row)
                row = []
                row = gral_info[:]

            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_pitch_avg = []
            seg_pitch_max = []
            seg_pitch_min = []
            seg_pitch_stddev = []
            seg_pitch_count = []
            seg_pitch_sum = []
            i = 0
            #Getting the aggregate values in the pitches array
            for row in transpose_pitch:
                seg_pitch_avg.append(get_avg(row))
                seg_pitch_max.append(get_max(row))
                seg_pitch_min.append(get_min(row))
                seg_pitch_stddev.append(get_stddev(row))
                seg_pitch_count.append(get_count(row))
                seg_pitch_sum.append(get_sum(row))
                i = i + 1

            #extracting information from the timbre array
            transpose_timbre = seg_pitch.transpose(
            )  #tranposing matrix, to have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_timbre_avg = []
            seg_timbre_max = []
            seg_timbre_min = []
            seg_timbre_stddev = []
            seg_timbre_count = []
            seg_timbre_sum = []
            i = 0
            for row in transpose_timbre:
                seg_timbre_avg.append(get_avg(row))
                seg_timbre_max.append(get_max(row))
                seg_timbre_min.append(get_min(row))
                seg_timbre_stddev.append(get_stddev(row))
                seg_timbre_count.append(get_count(row))
                seg_timbre_sum.append(get_sum(row))
                i = i + 1

            h5.close()
            count = count + 1
            print count
Example #22
0
                    cursor.execute("INSERT INTO artist_genres VALUES ('" +
                                   artist_id + "','" + term + "')")
            for tag in mbtags:
                tag = tag.replace("'", "")
                cursor.execute(
                    "SELECT * FROM artist_genres WHERE artist_id='" +
                    artist_id + "' AND genre ='" + tag + "'")
                if cursor.rowcount != 1:
                    cursor.execute("INSERT INTO artist_genres VALUES ('" +
                                   artist_id + "','" + tag + "')")
            ''' Store track tuples '''

            track_id = h.get_track_id(h5, 0)
            track_title = h.get_title(h5, 0)
            track_title = track_title.replace("'", "")
            track_album = h.get_release(h5, 0)
            track_album = track_album.replace("'", "")
            track_duration = str(h.get_duration(h5, 0))
            track_year = str(h.get_year(h5, 0))

            cursor.execute("SELECT * FROM track WHERE track_id = '" +
                           track_id + "'")
            rs = cursor.fetchall()
            if cursor.rowcount != 1:
                cursor.execute("INSERT INTO track VALUES ('" + track_id +
                               "','" + track_title + "','" + artist_id +
                               "','" + artist_name + "','" + track_album +
                               "'," + track_duration + "," + track_year + ");")
            ''' Store track_analysis tuples '''
            print("Track ID: " + h.get_track_id(h5, 0))
            track_tempo = str(h.get_tempo(h5, 0))
def get_all_rows(basedir, ext='.h5'):
    rows = []
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            #            print(os.path.join(root, f))
            h5 = hdf5_getters.open_h5_file_read(f)
            num_songs = hdf5_getters.get_num_songs(h5)
            #            print(num_songs)

            for i in range(num_songs):
                print(i)
                obj = {}
                obj['artist_name'] = hdf5_getters.get_artist_name(
                    h5, i).decode('UTF-8')
                obj['artist_familiarity'] = hdf5_getters.get_artist_familiarity(
                    h5, i)
                obj['artist_hotness'] = hdf5_getters.get_artist_hotttnesss(
                    h5, i)
                obj['artist_id'] = hdf5_getters.get_artist_id(
                    h5, i).decode('UTF-8')
                #                obj['artist_mbid']=hdf5_getters.get_artist_mbid(h5,i).decode('UTF-8')
                obj['artist_playmeid'] = hdf5_getters.get_artist_playmeid(
                    h5, i)
                obj['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid(
                    h5, i)
                #                obj['artist_latitude']=hdf5_getters.get_artist_latitude(h5,i)
                #                obj['artist_longitude']=hdf5_getters.get_artist_longitude(h5,i)
                #                obj['artist_location']=hdf5_getters.get_artist_location(h5,i).decode('UTF-8')
                obj['artist_name'] = hdf5_getters.get_artist_name(
                    h5, i).decode('UTF-8')
                obj['release'] = hdf5_getters.get_release(h5,
                                                          i).decode('UTF-8')
                obj['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss(
                    h5, i)
                obj['title'] = hdf5_getters.get_title(h5, i).decode('UTF-8')

                #            obj['artist_terms']=hdf5_getters.get_artist_terms(h5)
                #                obj['artist_terms_freq']=hdf5_getters.get_artist_terms_freq(h5)
                #                obj['artist_terms_weight']=hdf5_getters.get_artist_terms_weight(h5)
                #            obj['audio_md5']=hdf5_getters.get_audio_md5(h5).decode('UTF-8')
                obj['danceability'] = hdf5_getters.get_danceability(h5, i)
                obj['duration'] = hdf5_getters.get_duration(h5, i)
                obj['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5, i)
                obj['energy'] = hdf5_getters.get_energy(h5, i)
                obj['key'] = hdf5_getters.get_key(h5, i)
                obj['key_confidence'] = hdf5_getters.get_key_confidence(h5, i)
                obj['loudness'] = hdf5_getters.get_loudness(h5, i)
                obj['mode'] = hdf5_getters.get_mode(h5, i)
                #            obj['start_of_fade_out']=hdf5_getters.get_start_of_fade_out(h5)
                obj['tempo'] = hdf5_getters.get_tempo(h5, i)
                obj['time_signature'] = hdf5_getters.get_time_signature(h5, i)
                #            obj['time_signature_confidence']=hdf5_getters.get_time_signature_confidence(h5)
                obj['track_id'] = hdf5_getters.get_track_id(h5,
                                                            i).decode('UTF-8')
                #            obj['segments_start']=hdf5_getters.get_segments_start(h5)
                #            obj['segments_confidence']=hdf5_getters.get_segments_confidence(h5)
                #            obj['segments_pitches']=hdf5_getters.get_segments_pitches(h5)
                #            obj['segments_timbre']=hdf5_getters.get_segments_timbre(h5)
                #            obj['segments_loudness_max']=hdf5_getters.get_segments_loudness_max(h5)
                #            obj['segments_loudness_max_time']=hdf5_getters.get_segments_loudness_max_time(h5)
                #            obj['segments_confidence']=hdf5_getters.get_segments_confidence(h5)
                #            obj['segments_loudness_start']=hdf5_getters.get_segments_loudness_start(h5)
                #            obj['sections_start']=hdf5_getters.get_sections_start(h5)
                #            obj['sections_confidence']=hdf5_getters.get_sections_confidence(h5)
                #            obj['beats_start']=hdf5_getters.get_beats_start(h5)
                #            obj['beats_confidence']=hdf5_getters.get_beats_confidence(h5)
                #            obj['bars_start']=hdf5_getters.get_bars_start(h5)
                #            obj['bars_confidence']=hdf5_getters.get_bars_confidence(h5)
                #            obj['tatums_start']=hdf5_getters.get_tatums_start(h5)
                #            obj['artist_mbtags']=hdf5_getters.get_artist_mbtags(h5)
                #            obj['artist_mbtags_count']=hdf5_getters.get_artist_mbtags_count(h5)
                obj['year'] = hdf5_getters.get_year(h5, i)
                rows.append(obj)
        h5.close()
    return rows
Example #24
0
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Familiarity'.lower():  ####Added by us!
                    csvRowString += song.familiarity
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += song.artist_mbid
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid
                elif attribute == 'artist_7digid'.lower():
                    csvRowString += song.artist_7digid
                elif attribute == 'hottness'.lower():
                    csvRowString += song.hottness
                elif attribute == 'song_hottness'.lower():
                    csvRowString += song.song_hottness
                elif attribute == 'digitalid7'.lower():
                    csvRowString += song.digitalid7
                elif attribute == 'similar_artists'.lower():
                    csvRowString += song.similar_artists
                elif attribute == 'artist_terms'.lower():
                    csvRowString += song.artist_terms
                elif attribute == 'art_terms_freq'.lower():
                    csvRowString += song.art_terms_freq
                elif attribute == 'art_terms_weight'.lower():
                    csvRowString += song.art_terms_weight
                elif attribute == 'a_sample_rate'.lower():
                    csvRowString += song.a_sample_rate
                elif attribute == 'audio_md5'.lower():
                    csvRowString += song.audio_md5
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_conf'.lower():
                    csvRowString += song.mode_conf
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                elif attribute == 'trackid'.lower():
                    csvRowString += song.trackid
                elif attribute == 'segm_start'.lower():
                    csvRowString += song.segm_start
                elif attribute == 'segm_conf'.lower():
                    csvRowString += song.segm_conf
                elif attribute == 'segm_pitch'.lower():
                    csvRowString += song.segm_pitch
                elif attribute == 'segm_timbre'.lower():
                    csvRowString += song.segm_timbre
                elif attribute == 'segm_max_loud'.lower():
                    csvRowString += song.segm_max_loud
                elif attribute == 'segm_max_loud_time'.lower():
                    csvRowString += song.segm_max_loud_time
                elif attribute == 'segm_loud_start'.lower():
                    csvRowString += song.segm_loud_start
                elif attribute == 'sect_start'.lower():
                    csvRowString += song.sect_start
                elif attribute == 'sect_conf'.lower():
                    csvRowString += song.sect_conf
                elif attribute == 'beats_start'.lower():
                    csvRowString += song.beats_start
                elif attribute == 'beats_conf'.lower():
                    csvRowString += song.beats_conf
                elif attribute == 'bars_start'.lower():
                    csvRowString += song.bars_start
                elif attribute == 'bars_conf'.lower():
                    csvRowString += song.bars_conf
                elif attribute == 'tatums_start'.lower():
                    csvRowString += song.tatums_start
                elif attribute == 'tatums_conf'.lower():
                    csvRowString += song.tatums_conf
                elif attribute == 'artist_mbtags'.lower():
                    csvRowString += song.artist_mbtags
                elif attribute == 'artist_mbtags_count'.lower():
                    csvRowString += song.artist_mbtags_count
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print("==============")
                    print("I believe there has been an error with the input.")
                    print("==============")
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,Title,Year,Familiarity,Artist_Mbid,Artist_PlaymeId,Artist_7didId,Hottness,Song_Hottness,7digitalid,A_Sample_Rate,Audio_Md5,End_Of_Fade_In,Energy,Loudness,Mode,Mode_Conf,Start_Of_Fade_Out,TrackId"
        #################################################

        csvAttributeList = re.split(',', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        csvRowString += "\n"
        outputFile1.write(csvRowString)
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "/home/bigdata/smalltest/"  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print(f)

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            # testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))

            #########Added by us!
            song.familiarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.artist_playmeid = str(
                hdf5_getters.get_artist_playmeid(songH5File))
            song.artist_7digid = str(
                hdf5_getters.get_artist_7digitalid(songH5File))
            song.hottness = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.song_hottness = str(
                hdf5_getters.get_song_hotttnesss(songH5File))
            song.digitalid7 = str(
                hdf5_getters.get_track_7digitalid(songH5File))
            #song.similar_artists = str(hdf5_getters.get_similar_artists(songH5File))
            #song.artist_terms = str(hdf5_getters.get_artist_terms(songH5File))
            #song.art_terms_freq = str(hdf5_getters.get_artist_terms_freq(songH5File))
            #song.art_terms_weight = str(hdf5_getters.get_artist_terms_weight(songH5File))
            song.a_sample_rate = str(
                hdf5_getters.get_analysis_sample_rate(songH5File))
            song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File))
            song.end_of_fade_in = str(
                hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.mode_conf = str(hdf5_getters.get_mode_confidence(songH5File))
            song.start_of_fade_out = str(
                hdf5_getters.get_start_of_fade_out(songH5File))
            song.trackid = str(hdf5_getters.get_track_id(songH5File))
            #song.segm_start = str(hdf5_getters.get_segments_start(songH5File))
            #song.segm_conf = str(hdf5_getters.get_segments_confidence(songH5File))
            #song.segm_pitch = str(hdf5_getters.get_segments_pitches(songH5File))
            #song.segm_timbre = str(hdf5_getters.get_segments_timbre(songH5File))
            #song.segm_max_loud = str(hdf5_getters.get_segments_loudness_max(songH5File))
            #song.segm_max_loud_time = str(hdf5_getters.get_segments_loudness_max_time(songH5File))
            #song.segm_loud_start = str(hdf5_getters.get_segments_loudness_start(songH5File))
            #song.sect_start = str(hdf5_getters.get_sections_start(songH5File))
            #song.sect_conf = str(hdf5_getters.get_sections_confidence(songH5File))
            #song.beats_start = str(hdf5_getters.get_beats_start(songH5File))
            #song.beats_conf = str(hdf5_getters.get_beats_confidence(songH5File))
            #song.bars_start = str(hdf5_getters.get_bars_start(songH5File))
            #song.bars_conf = str(hdf5_getters.get_bars_confidence(songH5File))
            #song.tatums_start = str(hdf5_getters.get_tatums_start(songH5File))
            #song.tatums_conf = str(hdf5_getters.get_tatums_confidence(songH5File))
            #song.artist_mbtags = str(hdf5_getters.get_artist_mbtags(songH5File))
            #song.artist_mbtags_count = str(hdf5_getters.get_artist_mbtags_count(songH5File))

            #print song count
            #csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace("b\"", "")
                    albumName = albumName.replace("\"", "")
                    albumName = albumName.replace(',', "")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',', '')
                    location = location.replace("b\"", "")
                    location = location.replace("\"", "")
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    artistName = song.artistName
                    artistName = artistName.replace("b\"", "")
                    artistName = artistName.replace("\"", "")
                    csvRowString += "\"" + artistName + "\""
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    t = song.title
                    t = t.replace("b\"", "")
                    t = t.replace("\"", "")
                    csvRowString += "\"" + t + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'Familiarity'.lower():  ####Added by us!
                    csvRowString += song.familiarity
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += "\"" + song.artist_mbid + "\""
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid
                elif attribute == 'artist_7digid'.lower():
                    csvRowString += song.artist_7digid
                elif attribute == 'hottness'.lower():
                    csvRowString += song.hottness
                elif attribute == 'song_hottness'.lower():
                    csvRowString += song.song_hottness
                elif attribute == 'digitalid7'.lower():
                    csvRowString += song.digitalid7
                elif attribute == 'similar_artists'.lower():
                    csvRowString += song.similar_artists
                elif attribute == 'artist_terms'.lower():
                    csvRowString += song.artist_terms
                elif attribute == 'art_terms_freq'.lower():
                    csvRowString += song.art_terms_freq
                elif attribute == 'art_terms_weight'.lower():
                    csvRowString += song.art_terms_weight
                elif attribute == 'a_sample_rate'.lower():
                    csvRowString += song.a_sample_rate
                elif attribute == 'audio_md5'.lower():
                    csvRowString += "\"" + song.audio_md5 + "\""
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_conf'.lower():
                    csvRowString += song.mode_conf
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                elif attribute == 'trackid'.lower():
                    csvRowString += "\"" + song.trackid + "\""
                elif attribute == 'segm_start'.lower():
                    csvRowString += song.segm_start
                elif attribute == 'segm_conf'.lower():
                    csvRowString += song.segm_conf
                elif attribute == 'segm_pitch'.lower():
                    csvRowString += song.segm_pitch
                elif attribute == 'segm_timbre'.lower():
                    csvRowString += song.segm_timbre
                elif attribute == 'segm_max_loud'.lower():
                    csvRowString += song.segm_max_loud
                elif attribute == 'segm_max_loud_time'.lower():
                    csvRowString += song.segm_max_loud_time
                elif attribute == 'segm_loud_start'.lower():
                    csvRowString += song.segm_loud_start
                elif attribute == 'sect_start'.lower():
                    csvRowString += song.sect_start
                elif attribute == 'sect_conf'.lower():
                    csvRowString += song.sect_conf
                elif attribute == 'beats_start'.lower():
                    csvRowString += song.beats_start
                elif attribute == 'beats_conf'.lower():
                    csvRowString += song.beats_conf
                elif attribute == 'bars_start'.lower():
                    csvRowString += song.bars_start
                elif attribute == 'bars_conf'.lower():
                    csvRowString += song.bars_conf
                elif attribute == 'tatums_start'.lower():
                    csvRowString += song.tatums_start
                elif attribute == 'tatums_conf'.lower():
                    csvRowString += song.tatums_conf
                elif attribute == 'artist_mbtags'.lower():
                    csvRowString += song.artist_mbtags
                elif attribute == 'artist_mbtags_count'.lower():
                    csvRowString += song.artist_mbtags_count
                else:
                    csvRowString += "\"ERR\""

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
def data_to_flat_file(basedir,ext='.h5') :
    """ This function extracts the information from the tables and creates the flat file. """
    count = 0; #song counter
    list_to_write= []
    group_index=0
    row_to_write = ""
    writer = csv.writer(open("complete.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    row=[]
	    print f
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
            row.append(title)
	    comma=title.find(',')
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')
            row.append(album)
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')
            row.append(artist_name)
	    duration = hdf5_getters.get_duration(h5)
            row.append(duration)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            row.append(samp_rt)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            row.append(artist_7digitalid)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
            row.append(artist_fam)
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	             artist_hotness=-1
            row.append(artist_hotness)
	    artist_id = hdf5_getters.get_artist_id(h5)
            row.append(artist_id)           
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
            row.append(artist_lat)
	    artist_loc = hdf5_getters.get_artist_location(h5)
            row.append(artist_loc)
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
            row.append(artist_lon)
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
            row.append(artist_mbid)

	    #Getting the genre				       
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) 		    #index of the highest freq
	    genre_set=0					            #flag to see if the genre has been set or not
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			for i in genres_so_far:
				final_genre.append(i)
			    	genre_set=1
			
			
	    if genre_set == 1:
		col_num=[]
		for i in final_genre:
			column=int(i)				#getting the column number of the genre
			col_num.append(column)
	
		genre_array=genre_columns(col_num)	                #genre array 
	        for i in range(len(genre_array)):                   	#appending the genre_array to the row 
			row.append(genre_array[i])
	    else:
		genre_array=genre_columns(-1)				#when there is no genre matched, return an array of [0...0]
	        for i in range(len(genre_array)):                   	#appending the genre_array to the row 
			row.append(genre_array[i])
					

	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            row.append(artist_pmid)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
            row.append(audio_md5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
            row.append(danceability)
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
            row.append(end_fade_in)
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            row.append(energy)
            song_key = hdf5_getters.get_key(h5)
            row.append(song_key)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
            row.append(key_c)
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
            row.append(loudness)
	    mode = hdf5_getters.get_mode(h5)
            row.append(mode)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
            row.append(mode_conf)
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            row.append(release_7digitalid)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
            row.append(song_hot)
	    song_id = hdf5_getters.get_song_id(h5)
            row.append(song_id)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            row.append(start_fade_out)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
            row.append(tempo)
	    time_sig = hdf5_getters.get_time_signature(h5)
            row.append(time_sig)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
            row.append(time_sig_c)
	    track_id = hdf5_getters.get_track_id(h5)
            row.append(track_id)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            row.append(track_7digitalid)
	    year = hdf5_getters.get_year(h5)
            row.append(year)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_start = hdf5_getters.get_bars_start(h5)
	    row_bars_padding=padding(245)   #this is the array that will be attached at the end of th row

	    #--------------bars---------------"
	    gral_info=[]
	    gral_info=row[:]
	    empty=[]
	    for i,item in enumerate(bars_c):
                row.append(group_index)
                row.append(i)
                row.append(bars_c[i])
	        bars_c_avg= get_avg(bars_c)
                row.append(bars_c_avg)
	        bars_c_max= get_max(bars_c)	
                row.append(bars_c_max)
	        bars_c_min = get_min(bars_c)
                row.append(bars_c_min)
	        bars_c_stddev= get_stddev(bars_c)
                row.append(bars_c_stddev)
	        bars_c_count = get_count(bars_c)
                row.append(bars_c_count)
	        bars_c_sum = get_sum(bars_c)
                row.append(bars_c_sum)
                row.append(bars_start[i])	         
	        bars_start_avg = get_avg(bars_start)
                row.append(bars_start_avg)	         
	        bars_start_max= get_max(bars_start)
                row.append(bars_start_max)	         
	        bars_start_min = get_min(bars_start)
                row.append(bars_start_min)	         
	        bars_start_stddev= get_stddev(bars_start)
                row.append(bars_start_stddev)	         
	        bars_start_count = get_count(bars_start)
                row.append(bars_start_count)	         
	        bars_start_sum = get_sum(bars_start)
                row.append(bars_start_sum)	         
		for i in row_bars_padding:
			row.append(i)

                writer.writerow(row)
		row=[]
		row=gral_info[:]
	 

            #--------beats---------------"
	    beats_c = hdf5_getters.get_beats_confidence(h5)
	    group_index=1
	    row=[]
	    row=gral_info[:]
	    row_front=padding(14)  	#blanks left in front of the row(empty spaces for bars)
	    row_beats_padding=padding(231)
	    for i,item in enumerate(beats_c):
	   	row.append(group_index)
		row.append(i)
		for index in row_front:  #padding blanks in front of the beats
			row.append(index)
		
		row.append(beats_c[i])
	        beats_c_avg= get_avg(beats_c)
		row.append(beats_c_avg)
	        beats_c_max= get_max(beats_c)
		row.append(beats_c_max)
                beats_c_min = get_min(beats_c)
		row.append(beats_c_min)
	        beats_c_stddev= get_stddev(beats_c)
		row.append(beats_c_stddev)
	        beats_c_count = get_count(beats_c)
		row.append(beats_c_count)
	        beats_c_sum = get_sum(beats_c)
		row.append(beats_c_sum)
                beats_start = hdf5_getters.get_beats_start(h5)
		row.append(beats_start[i])
 	        beats_start_avg = get_avg(beats_start)
		row.append(beats_start_avg)
	        beats_start_max= get_max(beats_start)
		row.append(beats_start_max)
	        beats_start_min = get_min(beats_start)
		row.append(beats_start_min)
	        beats_start_stddev= get_stddev(beats_start)
		row.append(beats_start_stddev)
	        beats_start_count = get_count(beats_start)
		row.append(beats_start_count)
	        beats_start_sum = get_sum(beats_start)
		row.append(beats_start_sum)
		for i in row_beats_padding:
			row.append(i)
                
		writer.writerow(row)
		row=[]
		row=gral_info[:]

            # "--------sections---------------"
	    row_sec_padding=padding(217)	#blank spaces left at the end of the row
	    sec_c = hdf5_getters.get_sections_confidence(h5)
	    group_index=2
	    row=[]
	    row=gral_info[:]
	    row_front=padding(28)		#blank spaces left in front(empty spaces for bars,beats)
	    for i,item in enumerate(sec_c):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of the sections
			row.append(index)

		row.append(sec_c[i])
                sec_c_avg= get_avg(sec_c)
		row.append(sec_c_avg)
	        sec_c_max= get_max(sec_c)
		row.append(sec_c_max)
	        sec_c_min = get_min(sec_c)
		row.append(sec_c_min)
	        sec_c_stddev= get_stddev(sec_c)
		row.append(sec_c_stddev)
	        sec_c_count = get_count(sec_c)
		row.append(sec_c_count)
	        sec_c_sum = get_sum(sec_c)
		row.append(sec_c_sum)
	        sec_start = hdf5_getters.get_sections_start(h5)
		row.append(sec_start[i])	   
                sec_start_avg = get_avg(sec_start)
		row.append(sec_start_avg)
	        sec_start_max= get_max(sec_start)
		row.append(sec_start_max)
	        sec_start_min = get_min(sec_start)
		row.append(sec_start_min)
	        sec_start_stddev= get_stddev(sec_start)
		row.append(sec_start_stddev)
	        sec_start_count = get_count(sec_start)
		row.append(sec_start_count)
	        sec_start_sum = get_sum(sec_start)
		row.append(sec_start_sum)
		for i in row_sec_padding:	#appending the blank spaces at the end of the row
			row.append(i)
                

		writer.writerow(row)
		row=[]
		row=gral_info[:]


            #--------segments---------------"
	    row_seg_padding=padding(182)	#blank spaces at the end of the row
 	    row_front=padding(42)		#blank spaces left in front of segments
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    group_index=3
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(seg_c):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of the segments
			row.append(index)

		row.append(seg_c[i])
                seg_c_avg= get_avg(seg_c)
		row.append(seg_c_avg)
	        seg_c_max= get_max(seg_c)
		row.append(seg_c_max)
	        seg_c_min = get_min(seg_c)
		row.append(seg_c_min)
	        seg_c_stddev= get_stddev(seg_c)
		row.append(seg_c_stddev)
	        seg_c_count = get_count(seg_c)
		row.append(seg_c_count)
	        seg_c_sum = get_sum(seg_c)
		row.append(seg_c_sum)
                seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
		row.append(seg_loud_max[i])
                seg_loud_max_avg= get_avg(seg_loud_max)
		row.append(seg_loud_max_avg)
	        seg_loud_max_max= get_max(seg_loud_max)
		row.append(seg_loud_max_max)
	        seg_loud_max_min = get_min(seg_loud_max)
		row.append(seg_loud_max_min)
	        seg_loud_max_stddev= get_stddev(seg_loud_max)
		row.append(seg_loud_max_stddev)
	        seg_loud_max_count = get_count(seg_loud_max)
		row.append(seg_loud_max_count)
	        seg_loud_max_sum = get_sum(seg_loud_max)
		row.append(seg_loud_max_sum)
	        seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
		row.append(seg_loud_max_time[i])
	        seg_loud_max_time_avg= get_avg(seg_loud_max_time)
		row.append(seg_loud_max_time_avg)
	        seg_loud_max_time_max= get_max(seg_loud_max_time)
		row.append(seg_loud_max_time_max)
	        seg_loud_max_time_min = get_min(seg_loud_max_time)
		row.append(seg_loud_max_time_min)
	        seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
		row.append(seg_loud_max_time_stddev)
	        seg_loud_max_time_count = get_count(seg_loud_max_time)
		row.append(seg_loud_max_time_count)
	        seg_loud_max_time_sum = get_sum(seg_loud_max_time)
		row.append(seg_loud_max_time_sum)
	        seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
		row.append(seg_loud_start[i])
	        seg_loud_start_avg= get_avg(seg_loud_start)
		row.append(seg_loud_start_avg)
	        seg_loud_start_max= get_max(seg_loud_start)
		row.append(seg_loud_start_max)
	        seg_loud_start_min = get_min(seg_loud_start)
		row.append(seg_loud_start_min)
	        seg_loud_start_stddev= get_stddev(seg_loud_start)
		row.append(seg_loud_start_stddev)
	        seg_loud_start_count = get_count(seg_loud_start)
		row.append(seg_loud_start_count)
	        seg_loud_start_sum = get_sum(seg_loud_start)					      
		row.append(seg_loud_start_sum)
	        seg_start = hdf5_getters.get_segments_start(h5)
		row.append(seg_start[i])
	        seg_start_avg= get_avg(seg_start)
		row.append(seg_start_avg)
	        seg_start_max= get_max(seg_start)
		row.append(seg_start_max)
	        seg_start_min = get_min(seg_start)
		row.append(seg_start_min)
	        seg_start_stddev= get_stddev(seg_start)
		row.append(seg_start_stddev)
	        seg_start_count = get_count(seg_start)
		row.append(seg_start_count)
	        seg_start_sum = get_sum(seg_start)
		row.append(seg_start_sum)
		for i in row_seg_padding:	#appending blank spaces at the end of the row
			row.append(i)
                
		writer.writerow(row)
		row=[]
		row=gral_info[:]

	    #----------segments pitch and timbre---------------"
	    row_seg2_padding=padding(14)	#blank spaces left at the end of the row
	    row_front=padding(77)		#blank spaces left at the front of the segments and timbre
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    transpose_pitch= seg_pitch.transpose()          #this is to tranpose the matrix,so we can have 12 rows
	    group_index=4
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(transpose_pitch[0]):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of segments and timbre
			row.append(index)
	   
		row.append(transpose_pitch[0][i])
  		seg_pitch_avg= get_avg(transpose_pitch[0])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[0])	
		row.append(seg_pitch_max)
		seg_pitch_min = get_min(transpose_pitch[0])
		row.append(seg_pitch_min)
		seg_pitch_stddev= get_stddev(transpose_pitch[0])
		row.append(seg_pitch_stddev)
		seg_pitch_count = get_count(transpose_pitch[0])
		row.append(seg_pitch_count)
		seg_pitch_sum = get_sum(transpose_pitch[0])
		row.append(seg_pitch_sum)   
 		row.append(transpose_pitch[1][i])
 		seg_pitch_avg= get_avg(transpose_pitch[1])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[1])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[1])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[1])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[1])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[1])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[2][i])
 		seg_pitch_avg= get_avg(transpose_pitch[2])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[2])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[2])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[2])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[2])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[2])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[3][i])
 		seg_pitch_avg= get_avg(transpose_pitch[3])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[3])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[3])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[3])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[3])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[3])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[4][i])
 		seg_pitch_avg= get_avg(transpose_pitch[4])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[4])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[4])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[4])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[4])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[4])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[5][i])
 		seg_pitch_avg= get_avg(transpose_pitch[5])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[5])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[5])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[5])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[5])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[5])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[6][i])
 		seg_pitch_avg= get_avg(transpose_pitch[6])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[6])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[6])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[6])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[6])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[6])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[7][i])
 		seg_pitch_avg= get_avg(transpose_pitch[7])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[7])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[7])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[7])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[7])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[7])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[8][i])
 		seg_pitch_avg= get_avg(transpose_pitch[8])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[8])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[8])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[8])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[8])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[8])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[9][i])
 		seg_pitch_avg= get_avg(transpose_pitch[9])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[9])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[9])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[9])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[9])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[9])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[10][i])
 		seg_pitch_avg= get_avg(transpose_pitch[10])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[10])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[10])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[10])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[10])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[10])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[11][i])
 		seg_pitch_avg= get_avg(transpose_pitch[11])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[11])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[11])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[11])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[11])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[11])
		row.append(seg_pitch_sum)   
		#timbre arrays
	        seg_timbre = hdf5_getters.get_segments_timbre(h5)
                transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
		row.append(transpose_timbre[0][i])
  		seg_timbre_avg= get_avg(transpose_timbre[0])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[0])	
		row.append(seg_timbre_max)
		seg_timbre_min = get_min(transpose_timbre[0])
		row.append(seg_timbre_min)
		seg_timbre_stddev=get_stddev(transpose_timbre[0])
		row.append(seg_timbre_stddev)
		seg_timbre_count = get_count(transpose_timbre[0])
		row.append(seg_timbre_count)
		seg_timbre_sum = get_sum(transpose_timbre[0])
		row.append(seg_timbre_sum)   
 		row.append(transpose_timbre[1][i])
 		seg_timbre_avg= get_avg(transpose_timbre[1])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[1])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[1])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[1])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[1])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[1])
		row.append(seg_timbre_sum)   
		row.append(transpose_timbre[2][i])
 		seg_timbre_avg= get_avg(transpose_timbre[2])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[2])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[2])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[2])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[2])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[2])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[3][i])
 		seg_timbre_avg= get_avg(transpose_timbre[3])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[3])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[3])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[3])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[3])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[3])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[4][i])
 		seg_timbre_avg= get_avg(transpose_timbre[4])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[4])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[4])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[4])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[4])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[4])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[5][i])
 		seg_timbre_avg= get_avg(transpose_timbre[5])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[5])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[5])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[5])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[5])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[5])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[6][i])
 		seg_timbre_avg= get_avg(transpose_timbre[6])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[6])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[6])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[6])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[6])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[6])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[7][i])
 		seg_timbre_avg= get_avg(transpose_timbre[7])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[7])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[7])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[7])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[7])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[7])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[8][i])
 		seg_timbre_avg= get_avg(transpose_timbre[8])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[8])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[8])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[8])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[8])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[8])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[9][i])
 		seg_timbre_avg= get_avg(transpose_timbre[9])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[9])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[9])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[9])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[9])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[9])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[10][i])
 		seg_timbre_avg= get_avg(transpose_timbre[10])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[10])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[10])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[10])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[10])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[10])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[11][i])
 		seg_timbre_avg= get_avg(transpose_timbre[11])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[11])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[11])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[11])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[11])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[11])
		row.append(seg_timbre_sum)
	        for item in row_seg2_padding:
			row.append(item)
		writer.writerow(row)
		row=[]
		row=gral_info[:]


            # "--------tatums---------------"
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    group_index=5
	    row_front=padding(245)	#blank spaces left in front of tatums
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(tatms_c):
		row.append(group_index)
		row.append(i)
		for item in row_front:	#appending blank spaces at the front of the row
			row.append(item)

		row.append(tatms_c[i])
		tatms_c_avg= get_avg(tatms_c)
		row.append(tatms_c_avg)
	 	tatms_c_max= get_max(tatms_c)
		row.append(tatms_c_max)
	        tatms_c_min = get_min(tatms_c)
		row.append(tatms_c_min)
	        tatms_c_stddev= get_stddev(tatms_c)
		row.append(tatms_c_stddev)
                tatms_c_count = get_count(tatms_c)
		row.append(tatms_c_count)
                tatms_c_sum = get_sum(tatms_c)
		row.append(tatms_c_sum)
                tatms_start = hdf5_getters.get_tatums_start(h5)
		row.append(tatms_start[i])
	        tatms_start_avg= get_avg(tatms_start)
		row.append(tatms_start_avg)
	        tatms_start_max= get_max(tatms_start)
		row.append(tatms_start_max)
	        tatms_start_min = get_min(tatms_start)
		row.append(tatms_start_min)
	        tatms_start_stddev= get_stddev(tatms_start)
		row.append(tatms_start_stddev)
	        tatms_start_count = get_count(tatms_start)
		row.append(tatms_start_count)
	        tatms_start_sum = get_sum(tatms_start)				   
		row.append(tatms_start_sum)
		writer.writerow(row)
		row=[]
		row=gral_info[:]


 
	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1








	    h5.close()
	    count=count+1;
	    print count;
        print 'Get one at: http://developer.7digital.net/'
        print 'Pass it as a flag: -7digitalkey KEY'
        print 'or set it under environment variable: DIGITAL7_API_KEY'
        sys.exit(0)
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):',h5path
        sys.exit(0)


    # open h5 song, get all we know about the song
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
            print preview
            sys.exit(0)

    # we have the release id? get all tracks, find the closest match
    if release_7digitalid >= 0:
        tracks_name_ids = get_tracks_from_releaseid(release_7digitalid)
Example #27
0
def main():
    basedir = "./../songMetaInfo.txt"

    ext = ".h5"

    if len(sys.argv) > 1:
        basedir = sys.argv[1]

    outputfile = 'SongFileMetaData.csv'

    if len(sys.argv) > 2:
        outputfile = sys.argv[2]

    csvWriter = open(outputfile, 'w')

    csvWriter.write(
        "title,songId,artistId,artistfamilarity,artistHotness,songHotness," +
        "songEnfOfFadeIn,startFadeout,energy,loudness,albumID,albumName,artistName,danceability,duration,keySignatureConfidence,tempo,timeSignature,timeSignatureConfidence,year\n"
    )

    with open(basedir) as file:
        for line in file.readlines():
            f = line.strip()
            #newf = f + "text"
            print f
            #print f
            try:
                songH5File = hdf5_getters.open_h5_file_read(f)
                csvStr = ""
                #0
                title = str(hdf5_getters.get_title(songH5File))
                csvStr += title + ","
                #1
                songId = str(hdf5_getters.get_song_id(songH5File))
                csvStr += songId + ","
                #2
                artistId = str(hdf5_getters.get_artist_id(songH5File))
                csvStr += artistId + ","
                #3
                artistfamilarity = str(
                    hdf5_getters.get_artist_familiarity(songH5File))
                csvStr += artistfamilarity + ","
                #4
                artistHotness = str(
                    hdf5_getters.get_artist_hotttnesss(songH5File))
                csvStr += artistHotness + ","
                #5
                songHotness = str(hdf5_getters.get_song_hotttnesss(songH5File))
                csvStr += songHotness + ","
                #6
                songEnfOfFadeIn = str(
                    hdf5_getters.get_end_of_fade_in(songH5File))
                csvStr += songEnfOfFadeIn + ","
                #7
                startFadeOut = str(
                    hdf5_getters.get_start_of_fade_out(songH5File))
                csvStr += startFadeOut + ","
                #8
                energy = str(hdf5_getters.get_energy(songH5File))
                csvStr += energy + ","
                #9
                loudness = str(hdf5_getters.get_loudness(songH5File))
                csvStr += loudness + ","
                #10
                albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
                csvStr += albumID + ","
                #11
                albumName = str(hdf5_getters.get_release(songH5File))
                csvStr += albumName + ","
                #12
                artistName = str(hdf5_getters.get_artist_name(songH5File))
                csvStr += artistName + ","
                #13
                danceability = str(hdf5_getters.get_danceability(songH5File))
                csvStr += danceability + ","
                #14
                duration = str(hdf5_getters.get_duration(songH5File))
                csvStr += duration + ","
                #15
                keySignatureConfidence = str(
                    hdf5_getters.get_key_confidence(songH5File))
                csvStr += keySignatureConfidence + ","
                #16
                tempo = str(hdf5_getters.get_tempo(songH5File))
                csvStr += tempo + ","
                ## 17
                timeSignature = str(
                    hdf5_getters.get_time_signature(songH5File))
                csvStr += timeSignature + ","
                #18
                timeSignatureConfidence = str(
                    hdf5_getters.get_time_signature_confidence(songH5File))
                csvStr += timeSignatureConfidence + ","
                #19
                year = str(hdf5_getters.get_year(songH5File))
                csvStr += year + ","
                #print song count
                csvStr += "\n"
                csvWriter.write(csvStr)
                #print csvStr

                songH5File.close()
            except:
                print "Error in processing file"

        csvWriter.close()
def complete_hd5_to_csv(basedir):
    ext = '.h5'  # Get all files with extension .h5

    # Header title. Essentially it is a schema for all the following songs
    header = [
        'Title', 'Artist familiarity', 'Artist hotness', 'Artist ID',
        'Artist mbID', 'Artist playmeid', 'Artist 7DigitalID',
        'Artist latitude', 'Artist longitude', 'Artist location',
        'Artist Name', 'Release', 'Release 7DigitalID', 'Song ID',
        'Song Hotness', 'Track 7Digital', 'Analysis sample rate', 'Audio md5',
        'Danceability', 'Duration', 'End of Fade', 'Energy', 'Key',
        'Key Confidence', 'Loudness', 'Mode', 'Mode Confidence',
        'Start of fade out', 'Tempo', 'Time signature',
        'Time signature confidence', 'Track ID', 'Year'
    ]

    with open('Tester2.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';')

        # writing the header line. This line contains the schema of the data
        csv_writer.writerow(header)

        # Read all files from the given directories
        for root, dirs, files in os.walk(basedir):
            files = glob.glob(os.path.join(root, '*' + ext))
            print(files)

            for f in files:
                h5 = hdf5_getters.open_h5_file_read(f)

                # Write as row all elements. NOTE: Only the serialized elements are parsed and not arrays
                csv_writer.writerow([
                    hdf5_getters.get_title(h5),
                    hdf5_getters.get_artist_familiarity(h5),
                    hdf5_getters.get_artist_hotttnesss(h5),
                    hdf5_getters.get_artist_id(h5),
                    hdf5_getters.get_artist_mbid(h5),
                    hdf5_getters.get_artist_playmeid(h5),
                    hdf5_getters.get_artist_7digitalid(h5),
                    hdf5_getters.get_artist_latitude(h5),
                    hdf5_getters.get_artist_longitude(h5),
                    hdf5_getters.get_artist_location(h5),
                    hdf5_getters.get_artist_name(h5),
                    hdf5_getters.get_release(h5),
                    hdf5_getters.get_release_7digitalid(h5),
                    hdf5_getters.get_song_id(h5),
                    hdf5_getters.get_song_hotttnesss(h5),
                    hdf5_getters.get_track_7digitalid(h5),
                    hdf5_getters.get_analysis_sample_rate(h5),
                    hdf5_getters.get_audio_md5(h5),
                    hdf5_getters.get_danceability(h5),
                    hdf5_getters.get_duration(h5),
                    hdf5_getters.get_end_of_fade_in(h5),
                    hdf5_getters.get_energy(h5),
                    hdf5_getters.get_key(h5),
                    hdf5_getters.get_key_confidence(h5),
                    hdf5_getters.get_loudness(h5),
                    hdf5_getters.get_mode(h5),
                    hdf5_getters.get_mode_confidence(h5),
                    hdf5_getters.get_start_of_fade_out(h5),
                    hdf5_getters.get_tempo(h5),
                    hdf5_getters.get_time_signature(h5),
                    hdf5_getters.get_time_signature_confidence(h5),
                    hdf5_getters.get_track_id(h5),
                    hdf5_getters.get_year(h5)
                ])

                # For debugging purposes. Everything as expected
                # print()
                # print("Num of songs -- ", hdf5_getters.get_num_songs(h5))  # One song per file
                # print("Title -- ", hdf5_getters.get_title(h5))  # Print the title of a specific h5 file
                # print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5))
                # print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5))
                # print("Artist ID -- ", hdf5_getters.get_artist_id(h5))
                # print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5))
                # print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5))
                # print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5))
                # print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5))
                # print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5))
                # print("Artist location -- ", hdf5_getters.get_artist_location(h5))
                # print("Artist Name -- ", hdf5_getters.get_artist_name(h5))
                # print("Release -- ", hdf5_getters.get_release(h5))
                # print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5))
                # print("Song ID -- ", hdf5_getters.get_song_id(h5))
                # print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5))
                # print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5))
                # print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5))
                # print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5))
                # print("Danceability -- ", hdf5_getters.get_danceability(h5))
                # print("Duration -- ", hdf5_getters.get_duration(h5))
                # print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5))
                # print("Energy -- ", hdf5_getters.get_energy(h5))
                # print("Key -- ", hdf5_getters.get_key(h5))
                # print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5))
                # print("Loudness -- ", hdf5_getters.get_loudness(h5))
                # print("Mode -- ", hdf5_getters.get_mode(h5))
                # print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5))
                # print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5))
                # print("Tempo -- ", hdf5_getters.get_tempo(h5))
                # print("Time signature -- ", hdf5_getters.get_time_signature(h5))
                # print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5))
                # print("Track ID -- ", hdf5_getters.get_track_id(h5))
                # # print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5))
                # # print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5))
                # print("Year -- ", hdf5_getters.get_year(h5))

                h5.close()
Example #29
0
                term = term.replace("'","")
                cursor.execute("SELECT * FROM artist_genres WHERE artist_id='" + artist_id + "' AND genre ='" + term + "'")
                if cursor.rowcount != 1:
                    cursor.execute("INSERT INTO artist_genres VALUES ('" + artist_id + "','" + term + "')")
            for tag in mbtags:
                tag = tag.replace("'","")
                cursor.execute("SELECT * FROM artist_genres WHERE artist_id='" + artist_id + "' AND genre ='" + tag + "'")
                if cursor.rowcount != 1:
                    cursor.execute("INSERT INTO artist_genres VALUES ('" + artist_id + "','" + tag + "')")

            ''' Store track tuples '''

            track_id = h.get_track_id(h5,0)
            track_title = h.get_title(h5,0)
            track_title = track_title.replace("'","")
            track_album = h.get_release(h5,0)
            track_album = track_album.replace("'","")
            track_duration = str(h.get_duration(h5,0))
            track_year = str(h.get_year(h5,0))

            cursor.execute("SELECT * FROM track WHERE track_id = '" + track_id  + "'")
            rs = cursor.fetchall()
            if cursor.rowcount != 1:
                cursor.execute("INSERT INTO track VALUES ('" + track_id + "','" + track_title + "','" + artist_id  + "','"  + artist_name + "','" + track_album + "'," + track_duration + "," + track_year  + ");")
                      
            ''' Store track_analysis tuples '''
            print ("Track ID: " + h.get_track_id(h5,0))
            track_tempo = str(h.get_tempo(h5,0))
            track_key = str(h.get_key(h5,0))
            track_danceability = str(h.get_danceability(h5,0))
            if track_danceability == "nan":
def writeSingleHDF5FileToTxtFile(songHDF5FileName):
    global maximumArtistNameLen
    global maximumArtistTagLen
    global maximumSongNameLen
    global maximumAlbumNameLen
    """
    This function does 3 simple things:
    - open the song file
    - get artist ID and put it
    - close the file
    """
    songHDF5File = GETTERS.open_h5_file_read(songHDF5FileName)

    songID = GETTERS.get_song_id(songHDF5File)
    songName = GETTERS.get_title(songHDF5File)
    artistID = GETTERS.get_artist_id(songHDF5File)
    songAlbum = GETTERS.get_release(songHDF5File)
    songYear = GETTERS.get_year(songHDF5File)
    songTempo = GETTERS.get_tempo(songHDF5File)
    songDanceability = GETTERS.get_danceability(songHDF5File)
    songDuration = GETTERS.get_duration(songHDF5File)
    songEnergy = GETTERS.get_energy(songHDF5File)
    songKey = GETTERS.get_key(songHDF5File)
    songLoudness = GETTERS.get_loudness(songHDF5File)
    songMode = GETTERS.get_mode(songHDF5File)
    songTimeSignature = GETTERS.get_time_signature(songHDF5File)

    songsTableFile.write(songID + "\t" + songName + "\t" + artistID + "\t" +
                         songAlbum + "\t" + str(songYear) + "\t" +
                         str(songTempo) + "\t" + str(songDanceability) + "\t" +
                         str(songDuration) + "\t" + str(songEnergy) + "\t" +
                         str(songKey) + "\t" + str(songLoudness) + "\t" +
                         str(songMode) + "\t" + str(songTimeSignature) +
                         "\t\n")

    artistName = GETTERS.get_artist_name(songHDF5File)
    artistFamiliarity = GETTERS.get_artist_familiarity(songHDF5File)
    artistTagsArray = GETTERS.get_artist_mbtags(songHDF5File)

    artistsTableFile.write(artistID + "\t" + artistName + "\t" +
                           str(artistFamiliarity) + "\t\n")

    if len(songName) > maximumSongNameLen:
        maximumSongNameLen = len(songName)

    if len(songAlbum) > maximumAlbumNameLen:
        maximumAlbumNameLen = len(songAlbum)

    if len(artistName) > maximumArtistNameLen:
        maximumArtistNameLen = len(artistName)

    for artistTag in artistTagsArray:
        if artistTag in allowedTagsSet:

            artistsTagsTableFile.write(artistID + "\t" + artistTag + "\t\n")
            if artistTag not in tagsSet:
                tagsTableFile.write(artistTag + "\t\n")
                tagsSet.add(artistTag)
            if len(artistTag) > maximumArtistTagLen:
                maximumArtistTagLen = len(artistTag)

    similarArtists = GETTERS.get_similar_artists(songHDF5File)

    for similarArtist in similarArtists:
        similarArtistsPairsList.add((artistID, similarArtist))

    artistsIDsSet.add(artistID)
    artistsNamesSet.add(artistName)

    songHDF5File.close()
Example #31
0
def fill_attributes(song, songH5File):

    #----------------------------non array attributes-------------------------------
    song.analysisSampleRate = str(
        hdf5_getters.get_analysis_sample_rate(songH5File))
    song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File))
    song.artistFamiliarity = str(
        hdf5_getters.get_artist_familiarity(songH5File))
    song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File))
    song.artistID = str(hdf5_getters.get_artist_id(songH5File))
    song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
    song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
    song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
    song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File))
    song.artistName = str(hdf5_getters.get_artist_name(songH5File))
    song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File))
    song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File))
    song.danceability = str(hdf5_getters.get_danceability(songH5File))
    song.duration = str(hdf5_getters.get_duration(songH5File))
    song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File))
    song.energy = str(hdf5_getters.get_energy(songH5File))
    song.key = str(hdf5_getters.get_key(songH5File))
    song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_segments_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_sections_confidence(songH5File))
    song.loudness = str(hdf5_getters.get_loudness(songH5File))
    song.mode = str(hdf5_getters.get_mode(songH5File))
    song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File))
    song.release = str(hdf5_getters.get_release(songH5File))
    song.releaseDigitalID = str(
        hdf5_getters.get_release_7digitalid(songH5File))
    song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
    song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File))
    song.tempo = str(hdf5_getters.get_tempo(songH5File))
    song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
    song.timeSignatureConfidence = str(
        hdf5_getters.get_time_signature_confidence(songH5File))
    song.title = str(hdf5_getters.get_title(songH5File))
    song.trackID = str(hdf5_getters.get_track_id(songH5File))
    song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File))
    song.year = str(hdf5_getters.get_year(songH5File))

    #-------------------------------array attributes--------------------------------------
    #array float
    song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_start(songH5File))
    #array float
    song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_freq(songH5File))
    #array float
    song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_weight(songH5File))
    #array int
    song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_mbtags_count(songH5File))
    #array float
    song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_confidence(songH5File))
    #array float
    song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_start(songH5File))
    #array float
    song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_confidence(songH5File))
    #array float
    song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_confidence(songH5File))
    #array float
    song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_start(songH5File))
    #array float
    song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_confidence(songH5File))
    #array float
    song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max(songH5File))
    #array float
    song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max_time(songH5File))
    #array float
    song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_start(songH5File))
    #array float
    song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_start(songH5File))
    #array float
    song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_confidence(songH5File))
    #array float
    song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_start(songH5File))
    #array2d float
    song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_timbre(songH5File))
    #array2d float
    song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_pitches(songH5File))

    #------------------------array string attributes------------------------
    song.similarArtists = convert_array_to_string(
        hdf5_getters.get_similar_artists(songH5File))  #array string
    song.artistTerms = convert_array_to_string(
        hdf5_getters.get_artist_terms(songH5File))  #array string
    song.artistmbTags = convert_array_to_string(
        hdf5_getters.get_artist_mbtags(songH5File))  #array string

    return song
            song.artistMBTags = remove_trap_characters(str(list(temp)))
            song.artistMBTagsOuterCount = get_list_length(temp)
            song.artistMBTagsCount = remove_trap_characters(
                str(list(hdf5_getters.get_artist_mbtags_count(songH5File))))
            song.analysisSampleRate = remove_trap_characters(
                str(hdf5_getters.get_analysis_sample_rate(songH5File)))
            song.audioMD5 = remove_trap_characters(
                str(hdf5_getters.get_audio_md5(songH5File)))
            song.endOfFadeIn = remove_trap_characters(
                str(hdf5_getters.get_end_of_fade_in(songH5File)))
            song.startOfFadeOut = remove_trap_characters(
                str(hdf5_getters.get_start_of_fade_out(songH5File)))
            song.energy = remove_trap_characters(
                str(hdf5_getters.get_energy(songH5File)))
            song.release = remove_trap_characters(
                str(hdf5_getters.get_release(songH5File)))
            song.release7digitalid = remove_trap_characters(
                str(hdf5_getters.get_release_7digitalid(songH5File)))
            song.songHotness = remove_trap_characters(
                str(hdf5_getters.get_song_hotttnesss(songH5File)))
            song.track7digitalid = remove_trap_characters(
                str(hdf5_getters.get_track_7digitalid(songH5File)))

            temp = hdf5_getters.get_similar_artists(songH5File)
            song.similarartists = remove_trap_characters(str(list(list(temp))))
            song.similarArtistsCount = get_list_length(temp)
            song.loudness = remove_trap_characters(
                str(hdf5_getters.get_loudness(songH5File)))
            song.mode = remove_trap_characters(
                str(hdf5_getters.get_mode(songH5File)))
            song.modeConfidence = remove_trap_characters(
def main():
    basedir = "D:/Master K"
    ext = ".H5"  # Set the extension here. H5 is the extension for HDF5 files.
    songs = []
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        #songs = {}
        #keys = list()
        #values = list()
        for f in files:
            print(f)
            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))
            item = {"song_id": song.id.replace('b', '')}
            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.artistID = song.artistID.replace('b', '', 1)
            item["song_artistID"] = song.artistID
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumID = song.albumID.replace('b', '', 1)
            item["song_albumID"] = song.albumID
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.albumName = song.albumName.replace('b', '', 1)
            item["song_albumName"] = song.albumName
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLatitude = song.artistLatitude.replace('b', '', 1)
            item["song_artistLatitude"] = song.artistLatitude
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLocation = song.artistLocation.replace('b', '', 1)
            item["song_artistLocation"] = song.artistLocation
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistLongitude = song.artistLongitude.replace('b', '', 1)
            item["song_artistLongitude"] = song.artistLongitude
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.artistName = song.artistName.replace('b', '', 1)
            item["song_artistName"] = song.artistName
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.danceability = song.danceability.replace('b', '', 1)
            item["song_danceability"] = song.danceability
            song.duration = str(hdf5_getters.get_duration(songH5File))
            song.duration = song.duration.replace('b', '', 1)
            item["song_duration"] = song.duration
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignature = song.keySignature.replace('b', '', 1)
            item["song_keySignature"] = song.keySignature
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            song.keySignatureConfidence = song.keySignatureConfidence.replace(
                'b', '', 1)
            item["song_keySignatureConfidence"] = song.keySignatureConfidence
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.tempo = song.tempo.replace('b', '', 1)
            item["song_tempo"] = song.tempo
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignature = song.timeSignature.replace('b', '', 1)
            item["song_timeSignature"] = song.timeSignature
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.timeSignatureConfidence = song.timeSignatureConfidence.replace(
                'b', '', 1)
            item["song_timeSignatureConfidence"] = song.timeSignatureConfidence
            song.title = str(hdf5_getters.get_title(songH5File))
            song.title = song.title.replace('b', '', 1)
            item["song_title"] = song.title
            song.year = str(hdf5_getters.get_year(songH5File))
            song.year = song.year.replace('b', '', 1)
            item["song_year"] = song.year
            #song.mfcc = str(hdf5_getters.get_segments_timbre(songH5File))
            #item["song_mfcc"] = song.mfcc
            item["song_mfcc"] = list(
                hdf5_getters.get_segments_timbre(songH5File))
            song.hotness = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            item["song_hotness"] = song.hotness
            songs.append(item)
            songH5File.close()
            #song_dict= dict(zip(keys, values))
    with open("D:\data_file_k.json", "w") as write_file:
        json.dump(songs, write_file, cls=NumpyEncoder)
 writer_tatums_file = csv.writer(outputTatumsFile, lineterminator='\n')
 writer_MBTags_File = csv.writer(outputMBTagsFile, lineterminator='\n')
 for dirname, dirs, files in os.walk(files):
     for filename in files:
         filename_without_extension, extension = os.path.splitext(filename)
         if extension == '.h5':
             hdf = hdf5_getters.open_h5_file_read(dirname + '/' + filename)
             artist_id = hdf5_getters.get_artist_id(hdf)
             artists_mb_id = hdf5_getters.get_artist_mbid(hdf)
             artist_playmeid = hdf5_getters.get_artist_playmeid(hdf)
             artist_7digitalid = hdf5_getters.get_artist_7digitalid(hdf)
             artist_name = hdf5_getters.get_artist_name(hdf)
             artist_familarity = hdf5_getters.get_artist_familiarity(hdf)
             artist_hotttnesss = hdf5_getters.get_artist_hotttnesss(hdf)
             artist_location = hdf5_getters.get_artist_location(hdf)
             release = hdf5_getters.get_release(hdf)
             release_7digitalid = hdf5_getters.get_release_7digitalid(hdf)
             song_id = hdf5_getters.get_song_id(hdf)
             title = hdf5_getters.get_title(hdf)
             song_hotttnesss = hdf5_getters.get_song_hotttnesss(hdf)
             track_7digitalid = hdf5_getters.get_track_7digitalid(hdf)
             analysis_sample_rate = hdf5_getters.get_analysis_sample_rate(
                 hdf)
             audio_md5 = hdf5_getters.get_audio_md5(hdf)
             duration = hdf5_getters.get_duration(hdf)
             end_of_fade_in = hdf5_getters.get_end_of_fade_in(hdf)
             energy = hdf5_getters.get_energy(hdf)
             key = hdf5_getters.get_key(hdf)
             key_confidence = hdf5_getters.get_key_confidence(hdf)
             loudness = hdf5_getters.get_loudness(hdf)
             mode = hdf5_getters.get_mode(hdf)
        print 'You need to set a 7digital API key!'
        print 'Get one at: http://developer.7digital.net/'
        print 'Pass it as a flag: -7digitalkey KEY'
        print 'or set it under environment variable: DIGITAL7_API_KEY'
        sys.exit(0)
    if not os.path.isfile(h5path):
        print 'invalid path (not a file):', h5path
        sys.exit(0)

    # open h5 song, get all we know about the song
    h5 = hdf5_utils.open_h5_file_read(h5path)
    track_7digitalid = GETTERS.get_track_7digitalid(h5)
    release_7digitalid = GETTERS.get_release_7digitalid(h5)
    artist_7digitalid = GETTERS.get_artist_7digitalid(h5)
    artist_name = GETTERS.get_artist_name(h5)
    release_name = GETTERS.get_release(h5)
    track_name = GETTERS.get_title(h5)
    h5.close()

    # we already have the 7digital track id? way too easy!
    if track_7digitalid >= 0:
        preview = get_preview_from_trackid(track_7digitalid)
        if preview == '':
            print 'something went wrong when looking by track id'
        else:
            print preview
            sys.exit(0)

    # we have the release id? get all tracks, find the closest match
    if release_7digitalid >= 0:
        tracks_name_ids = get_tracks_from_releaseid(release_7digitalid)
def data_to_flat_file(basedir, ext='.h5'):
    """This function extract the information from the tables and creates the flat file."""
    count = 0
    #song counter
    list_to_write = []
    row_to_write = ""
    writer = csv.writer(open("metadata_wholeA.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print f  #the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
            title = hdf5_getters.get_title(h5)
            title = title.replace('"', '')
            comma = title.find(',')  #eliminating commas in the title
            if comma != -1:
                print title
                time.sleep(1)
            album = hdf5_getters.get_release(h5)
            album = album.replace('"', '')  #eliminating commas in the album
            comma = album.find(',')
            if comma != -1:
                print album
                time.sleep(1)
            artist_name = hdf5_getters.get_artist_name(h5)
            comma = artist_name.find(',')
            if comma != -1:
                print artist_name
                time.sleep(1)
            artist_name = artist_name.replace('"',
                                              '')  #eliminating double quotes
            duration = hdf5_getters.get_duration(h5)
            samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            artist_fam = hdf5_getters.get_artist_familiarity(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_fam) == True:
                artist_fam = -1
            artist_hotness = hdf5_getters.get_artist_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_hotness) == True:
                artist_hotness = -1
            artist_id = hdf5_getters.get_artist_id(h5)
            artist_lat = hdf5_getters.get_artist_latitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lat) == True:
                artist_lat = -1
            artist_loc = hdf5_getters.get_artist_location(h5)
            #checks artist_loc to see if it is a hyperlink if it is set as empty string
            artist_loc = artist_loc.replace(",", "\,")
            if artist_loc.startswith("<a"):
                artist_loc = ""
            if len(artist_loc) > 100:
                artist_loc = ""
            artist_lon = hdf5_getters.get_artist_longitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lon) == True:
                artist_lon = -1
            artist_mbid = hdf5_getters.get_artist_mbid(h5)
            artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            audio_md5 = hdf5_getters.get_audio_md5(h5)
            danceability = hdf5_getters.get_danceability(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(danceability) == True:
                danceability = -1
            end_fade_in = hdf5_getters.get_end_of_fade_in(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(end_fade_in) == True:
                end_fade_in = -1
            energy = hdf5_getters.get_energy(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(energy) == True:
                energy = -1
            song_key = hdf5_getters.get_key(h5)
            key_c = hdf5_getters.get_key_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(key_c) == True:
                key_c = -1
            loudness = hdf5_getters.get_loudness(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(loudness) == True:
                loudness = -1
            mode = hdf5_getters.get_mode(h5)
            mode_conf = hdf5_getters.get_mode_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(mode_conf) == True:
                mode_conf = -1
            release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            song_hot = hdf5_getters.get_song_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(song_hot) == True:
                song_hot = -1
            song_id = hdf5_getters.get_song_id(h5)
            start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            tempo = hdf5_getters.get_tempo(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(tempo) == True:
                tempo = -1
            time_sig = hdf5_getters.get_time_signature(h5)
            time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(time_sig_c) == True:
                time_sig_c = -1
            track_id = hdf5_getters.get_track_id(h5)
            track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            year = hdf5_getters.get_year(h5)
            bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_c_avg = get_avg(bars_c)
            bars_c_max = get_max(bars_c)
            bars_c_min = get_min(bars_c)
            bars_c_stddev = get_stddev(bars_c)
            bars_c_count = get_count(bars_c)
            bars_c_sum = get_sum(bars_c)
            bars_start = hdf5_getters.get_bars_start(h5)
            bars_start_avg = get_avg(bars_start)
            bars_start_max = get_max(bars_start)
            bars_start_min = get_min(bars_start)
            bars_start_stddev = get_stddev(bars_start)
            bars_start_count = get_count(bars_start)
            bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg = get_avg(beats_c)
            beats_c_max = get_max(beats_c)
            beats_c_min = get_min(beats_c)
            beats_c_stddev = get_stddev(beats_c)
            beats_c_count = get_count(beats_c)
            beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
            beats_start_avg = get_avg(beats_start)
            beats_start_max = get_max(beats_start)
            beats_start_min = get_min(beats_start)
            beats_start_stddev = get_stddev(beats_start)
            beats_start_count = get_count(beats_start)
            beats_start_sum = get_sum(beats_start)
            sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg = get_avg(sec_c)
            sec_c_max = get_max(sec_c)
            sec_c_min = get_min(sec_c)
            sec_c_stddev = get_stddev(sec_c)
            sec_c_count = get_count(sec_c)
            sec_c_sum = get_sum(sec_c)
            sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
            sec_start_max = get_max(sec_start)
            sec_start_min = get_min(sec_start)
            sec_start_stddev = get_stddev(sec_start)
            sec_start_count = get_count(sec_start)
            sec_start_sum = get_sum(sec_start)
            seg_c = hdf5_getters.get_segments_confidence(h5)
            seg_c_avg = get_avg(seg_c)
            seg_c_max = get_max(seg_c)
            seg_c_min = get_min(seg_c)
            seg_c_stddev = get_stddev(seg_c)
            seg_c_count = get_count(seg_c)
            seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg = get_avg(seg_loud_max)
            seg_loud_max_max = get_max(seg_loud_max)
            seg_loud_max_min = get_min(seg_loud_max)
            seg_loud_max_stddev = get_stddev(seg_loud_max)
            seg_loud_max_count = get_count(seg_loud_max)
            seg_loud_max_sum = get_sum(seg_loud_max)
            seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
            seg_loud_max_time_avg = get_avg(seg_loud_max_time)
            seg_loud_max_time_max = get_max(seg_loud_max_time)
            seg_loud_max_time_min = get_min(seg_loud_max_time)
            seg_loud_max_time_stddev = get_stddev(seg_loud_max_time)
            seg_loud_max_time_count = get_count(seg_loud_max_time)
            seg_loud_max_time_sum = get_sum(seg_loud_max_time)
            seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
            seg_loud_start_avg = get_avg(seg_loud_start)
            seg_loud_start_max = get_max(seg_loud_start)
            seg_loud_start_min = get_min(seg_loud_start)
            seg_loud_start_stddev = get_stddev(seg_loud_start)
            seg_loud_start_count = get_count(seg_loud_start)
            seg_loud_start_sum = get_sum(seg_loud_start)
            seg_pitch = hdf5_getters.get_segments_pitches(h5)
            pitch_size = len(seg_pitch)
            seg_start = hdf5_getters.get_segments_start(h5)
            seg_start_avg = get_avg(seg_start)
            seg_start_max = get_max(seg_start)
            seg_start_min = get_min(seg_start)
            seg_start_stddev = get_stddev(seg_start)
            seg_start_count = get_count(seg_start)
            seg_start_sum = get_sum(seg_start)
            seg_timbre = hdf5_getters.get_segments_timbre(h5)
            tatms_c = hdf5_getters.get_tatums_confidence(h5)
            tatms_c_avg = get_avg(tatms_c)
            tatms_c_max = get_max(tatms_c)
            tatms_c_min = get_min(tatms_c)
            tatms_c_stddev = get_stddev(tatms_c)
            tatms_c_count = get_count(tatms_c)
            tatms_c_sum = get_sum(tatms_c)
            tatms_start = hdf5_getters.get_tatums_start(h5)
            tatms_start_avg = get_avg(tatms_start)
            tatms_start_max = get_max(tatms_start)
            tatms_start_min = get_min(tatms_start)
            tatms_start_stddev = get_stddev(tatms_start)
            tatms_start_count = get_count(tatms_start)
            tatms_start_sum = get_sum(tatms_start)

            #Getting the genres
            genre_set = 0  #flag to see if the genre has been set or not
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
            trn_wght = hdf5_getters.get_artist_terms_weight(h5)
            a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
            genre_indexes = get_genre_indexes(
                trm_freq)  #index of the highest freq
            final_genre = []
            genres_so_far = []
            for i in range(len(genre_indexes)):
                genre_tmp = get_genre(
                    art_trm, genre_indexes[i]
                )  #genre that corresponds to the highest freq
                genres_so_far = genre_dict.get_genre_in_dict(
                    genre_tmp)  #getting the genre from the dictionary
                if len(genres_so_far) != 0:
                    for i in genres_so_far:
                        final_genre.append(i)
                        genre_set = 1  #genre was found in dictionary

            if genre_set == 1:
                col_num = []

                for genre in final_genre:
                    column = int(
                        genre)  #getting the column number of the genre
                    col_num.append(column)

                genre_array = genre_columns(col_num)  #genre array
            else:
                genre_array = genre_columns(
                    -1)  #the genre was not found in the dictionary

            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_pitch_avg = []
            seg_pitch_max = []
            seg_pitch_min = []
            seg_pitch_stddev = []
            seg_pitch_count = []
            seg_pitch_sum = []
            i = 0
            #Getting the aggregate values in the pitches array
            for row in transpose_pitch:
                seg_pitch_avg.append(get_avg(row))
                seg_pitch_max.append(get_max(row))
                seg_pitch_min.append(get_min(row))
                seg_pitch_stddev.append(get_stddev(row))
                seg_pitch_count.append(get_count(row))
                seg_pitch_sum.append(get_sum(row))
                i = i + 1

            #extracting information from the timbre array
            transpose_timbre = seg_pitch.transpose(
            )  #tranposing matrix, to have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_timbre_avg = []
            seg_timbre_max = []
            seg_timbre_min = []
            seg_timbre_stddev = []
            seg_timbre_count = []
            seg_timbre_sum = []
            i = 0
            for row in transpose_timbre:
                seg_timbre_avg.append(get_avg(row))
                seg_timbre_max.append(get_max(row))
                seg_timbre_min.append(get_min(row))
                seg_timbre_stddev.append(get_stddev(row))
                seg_timbre_count.append(get_count(row))
                seg_timbre_sum.append(get_sum(row))
                i = i + 1

        #Writing to the flat file
            writer.writerow([
                title, album, artist_name, year, duration, seg_start_count,
                tempo
            ])

            h5.close()
            count = count + 1
            print count
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" +
                "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n" +
                "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"


                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user, 
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+
            "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+
            "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+
            "Title,Year")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,");
        outputFile1.write(csvRowString + "\n");
        csvRowString = ""  

    #################################################


    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "." # "." As the default means the current directory
    ext = ".H5" #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):        
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))

            #print song count
            csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',',"")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',','')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude                
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""                
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence                                 
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence                                   
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
Example #38
0
                continue

            # Get the artist name
            artist_name = hdf5_getters.get_artist_name(h5)
            artist = artist_name.translate(None, string.punctuation)

            # Get the title of the song
            title_song = hdf5_getters.get_title(h5)
            title = title_song.translate(None, string.punctuation)

            # Get artist location
            artist_location = hdf5_getters.get_artist_location(h5)
            artist_loc = artist_location.translate(None, string.punctuation)

            # Get release
            release_song = hdf5_getters.get_release(h5)
            release = release_song.translate(None, string.punctuation)

            # Get artist HOTTTNESSSSSS
            hotttness = hdf5_getters.get_artist_hotttnesss(h5)

            # Get artist familiarity
            familiarity = hdf5_getters.get_artist_familiarity(h5)

            # Get danceability
            danceability = hdf5_getters.get_danceability(h5)

            # Get duration
            duration = hdf5_getters.get_duration(h5)

            # Get energy
Example #39
0
def data_to_flat_file(basedir,ext='.h5') :
    """This function extract the information from the tables and creates the flat file."""	
    count = 0;	#song counter
    list_to_write= []
    row_to_write = ""
    writer = csv.writer(open("metadata.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    print f	#the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
	    comma=title.find(',')	#eliminating commas in the title
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')	#eliminating commas in the album	
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')	#eliminating double quotes
	    duration = hdf5_getters.get_duration(h5)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	            artist_hotness=-1
	    artist_id = hdf5_getters.get_artist_id(h5)
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
	    artist_loc = hdf5_getters.get_artist_location(h5)
		#checks artist_loc to see if it is a hyperlink if it is set as empty string
	    artist_loc = artist_loc.replace(",", "\,");
	    if artist_loc.startswith("<a"):
                artist_loc = ""
	    if len(artist_loc) > 100:
                artist_loc = ""
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            song_key = hdf5_getters.get_key(h5)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
	    mode = hdf5_getters.get_mode(h5)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
	    song_id = hdf5_getters.get_song_id(h5)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
	    time_sig = hdf5_getters.get_time_signature(h5)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
	    track_id = hdf5_getters.get_track_id(h5)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
	    year = hdf5_getters.get_year(h5)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
	    bars_c_avg= get_avg(bars_c)
	    bars_c_max= get_max(bars_c)
	    bars_c_min = get_min(bars_c)
	    bars_c_stddev= get_stddev(bars_c)
	    bars_c_count = get_count(bars_c)
	    bars_c_sum = get_sum(bars_c)
	    bars_start = hdf5_getters.get_bars_start(h5)
	    bars_start_avg = get_avg(bars_start)
	    bars_start_max= get_max(bars_start)
	    bars_start_min = get_min(bars_start)
	    bars_start_stddev= get_stddev(bars_start)
	    bars_start_count = get_count(bars_start)
	    bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg= get_avg(beats_c)
	    beats_c_max= get_max(beats_c)
	    beats_c_min = get_min(beats_c)
	    beats_c_stddev= get_stddev(beats_c)
	    beats_c_count = get_count(beats_c)
	    beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
 	    beats_start_avg = get_avg(beats_start)
	    beats_start_max= get_max(beats_start)
	    beats_start_min = get_min(beats_start)
	    beats_start_stddev= get_stddev(beats_start)
	    beats_start_count = get_count(beats_start)
	    beats_start_sum = get_sum(beats_start)
	    sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg= get_avg(sec_c)
	    sec_c_max= get_max(sec_c)
	    sec_c_min = get_min(sec_c)
	    sec_c_stddev= get_stddev(sec_c)
	    sec_c_count = get_count(sec_c)
	    sec_c_sum = get_sum(sec_c)
	    sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
	    sec_start_max= get_max(sec_start)
	    sec_start_min = get_min(sec_start)
	    sec_start_stddev= get_stddev(sec_start)
	    sec_start_count = get_count(sec_start)
	    sec_start_sum = get_sum(sec_start)
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    seg_c_avg= get_avg(seg_c)
	    seg_c_max= get_max(seg_c)
	    seg_c_min = get_min(seg_c)
	    seg_c_stddev= get_stddev(seg_c)
	    seg_c_count = get_count(seg_c)
	    seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg= get_avg(seg_loud_max)
	    seg_loud_max_max= get_max(seg_loud_max)
	    seg_loud_max_min = get_min(seg_loud_max)
	    seg_loud_max_stddev= get_stddev(seg_loud_max)
	    seg_loud_max_count = get_count(seg_loud_max)
	    seg_loud_max_sum = get_sum(seg_loud_max)
	    seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
	    seg_loud_max_time_avg= get_avg(seg_loud_max_time)
	    seg_loud_max_time_max= get_max(seg_loud_max_time)
	    seg_loud_max_time_min = get_min(seg_loud_max_time)
	    seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
	    seg_loud_max_time_count = get_count(seg_loud_max_time)
	    seg_loud_max_time_sum = get_sum(seg_loud_max_time)
	    seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
	    seg_loud_start_avg= get_avg(seg_loud_start)
	    seg_loud_start_max= get_max(seg_loud_start)
	    seg_loud_start_min = get_min(seg_loud_start)
	    seg_loud_start_stddev= get_stddev(seg_loud_start)
	    seg_loud_start_count = get_count(seg_loud_start)
	    seg_loud_start_sum = get_sum(seg_loud_start)					      
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    pitch_size = len(seg_pitch)
	    seg_start = hdf5_getters.get_segments_start(h5)
	    seg_start_avg= get_avg(seg_start)
	    seg_start_max= get_max(seg_start)
	    seg_start_min = get_min(seg_start)
	    seg_start_stddev= get_stddev(seg_start)
	    seg_start_count = get_count(seg_start)
	    seg_start_sum = get_sum(seg_start)
	    seg_timbre = hdf5_getters.get_segments_timbre(h5)
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    tatms_c_avg= get_avg(tatms_c)
	    tatms_c_max= get_max(tatms_c)
	    tatms_c_min = get_min(tatms_c)
	    tatms_c_stddev= get_stddev(tatms_c)
	    tatms_c_count = get_count(tatms_c)
	    tatms_c_sum = get_sum(tatms_c)
	    tatms_start = hdf5_getters.get_tatums_start(h5)
	    tatms_start_avg= get_avg(tatms_start)
	    tatms_start_max= get_max(tatms_start)
	    tatms_start_min = get_min(tatms_start)
	    tatms_start_stddev= get_stddev(tatms_start)
	    tatms_start_count = get_count(tatms_start)
	    tatms_start_sum = get_sum(tatms_start)
	
	    #Getting the genres
	    genre_set = 0    #flag to see if the genre has been set or not
	    art_trm = hdf5_getters.get_artist_terms(h5)
	    trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			    for i in genres_so_far:
				final_genre.append(i)
				genre_set=1				#genre was found in dictionary
				  
		
	    
	    if genre_set == 1:
		    col_num=[]
		   
		    for genre in final_genre:
			    column=int(genre)				#getting the column number of the genre
			    col_num.append(column)

		    genre_array=genre_columns(col_num)	         #genre array
 	    else:
		    genre_array=genre_columns(-1)		#the genre was not found in the dictionary

	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1
		


		#Writing to the flat file

            writer.writerow([title,album,artist_name,duration,samp_rt,artist_7digitalid,artist_fam,artist_hotness,artist_id,artist_lat,artist_loc,artist_lon,artist_mbid,genre_array[0],genre_array[1],genre_array[2],
genre_array[3],genre_array[4],genre_array[5],genre_array[6],genre_array[7],genre_array[8],genre_array[9],genre_array[10],genre_array[11],genre_array[12],genre_array[13],genre_array[14],genre_array[15],
genre_array[16],genre_array[17],genre_array[18],genre_array[19],genre_array[20],genre_array[21],genre_array[22],genre_array[23],genre_array[24],genre_array[25],genre_array[26],
genre_array[27],genre_array[28],genre_array[29],genre_array[30],genre_array[31],genre_array[32],genre_array[33],genre_array[34],genre_array[35],genre_array[36],genre_array[37],genre_array[38],
genre_array[39],genre_array[40],genre_array[41],genre_array[42],genre_array[43],genre_array[44],genre_array[45],genre_array[46],genre_array[47],genre_array[48],genre_array[49],
genre_array[50],genre_array[51],genre_array[52],genre_array[53],genre_array[54],genre_array[55],genre_array[56],genre_array[57],genre_array[58],genre_array[59],
genre_array[60],genre_array[61],genre_array[62],genre_array[63],genre_array[64],genre_array[65],genre_array[66],genre_array[67],genre_array[68],genre_array[69],
genre_array[70],genre_array[71],genre_array[72],genre_array[73],genre_array[74],genre_array[75],genre_array[76],genre_array[77],genre_array[78],genre_array[79],
genre_array[80],genre_array[81],genre_array[82],genre_array[83],genre_array[84],genre_array[85],genre_array[86],genre_array[87],genre_array[88],genre_array[89],
genre_array[90],genre_array[91],genre_array[92],genre_array[93],genre_array[94],genre_array[95],genre_array[96],genre_array[97],genre_array[98],genre_array[99],genre_array[100],genre_array[101],
genre_array[102],genre_array[103],genre_array[104],genre_array[105],genre_array[106],genre_array[107],genre_array[108],genre_array[109],genre_array[110],genre_array[111],genre_array[112],
genre_array[113],genre_array[114],genre_array[115],genre_array[116],genre_array[117],genre_array[118],genre_array[119],genre_array[120],genre_array[121],genre_array[122],genre_array[123],
genre_array[124],genre_array[125],genre_array[126],genre_array[127],genre_array[128],genre_array[129],genre_array[130],genre_array[131],genre_array[132],
artist_pmid,audio_md5,danceability,end_fade_in,energy,song_key,key_c,loudness,mode,mode_conf,release_7digitalid,song_hot,song_id,start_fade_out,tempo,time_sig,time_sig_c,track_id,track_7digitalid,year,bars_c_avg,bars_c_max,bars_c_min,bars_c_stddev,bars_c_count,bars_c_sum,bars_start_avg,bars_start_max,bars_start_min,bars_start_stddev,bars_start_count,bars_start_sum,beats_c_avg,beats_c_max,beats_c_min,beats_c_stddev,beats_c_count,beats_c_sum,beats_start_avg,beats_start_max,beats_start_min, beats_start_stddev,beats_start_count,beats_start_sum, sec_c_avg,sec_c_max,sec_c_min,sec_c_stddev,sec_c_count,sec_c_sum,sec_start_avg,sec_start_max,sec_start_min,sec_start_stddev,sec_start_count,sec_start_sum,seg_c_avg,seg_c_max,seg_c_min,seg_c_stddev,seg_c_count,seg_c_sum,seg_loud_max_avg,seg_loud_max_max,seg_loud_max_min,seg_loud_max_stddev,seg_loud_max_count,seg_loud_max_sum,seg_loud_max_time_avg,seg_loud_max_time_max,seg_loud_max_time_min,seg_loud_max_time_stddev,seg_loud_max_time_count,seg_loud_max_time_sum,seg_loud_start_avg,seg_loud_start_max,seg_loud_start_min,seg_loud_start_stddev,seg_loud_start_count,seg_loud_start_sum,seg_pitch_avg[0],seg_pitch_max[0],seg_pitch_min[0],seg_pitch_stddev[0],seg_pitch_count[0],seg_pitch_sum[0],seg_pitch_avg[1],seg_pitch_max[1],seg_pitch_min[1],seg_pitch_stddev[1],seg_pitch_count[1],seg_pitch_sum[1],seg_pitch_avg[2],seg_pitch_max[2],seg_pitch_min[2],seg_pitch_stddev[2],seg_pitch_count[2],seg_pitch_sum[2],seg_pitch_avg[3],seg_pitch_max[3],seg_pitch_min[3],seg_pitch_stddev[3],seg_pitch_count[3],seg_pitch_sum[3],seg_pitch_avg[4],seg_pitch_max[4],seg_pitch_min[4],seg_pitch_stddev[4],seg_pitch_count[4],seg_pitch_sum[4],seg_pitch_avg[5],seg_pitch_max[5],seg_pitch_min[5],seg_pitch_stddev[5],seg_pitch_count[5],seg_pitch_sum[5],seg_pitch_avg[6],seg_pitch_max[6],seg_pitch_min[6],seg_pitch_stddev[6],seg_pitch_count[6],seg_pitch_sum[6],seg_pitch_avg[7],seg_pitch_max[7],seg_pitch_min[7],seg_pitch_stddev[7],seg_pitch_count[7],seg_pitch_sum[7],seg_pitch_avg[8],seg_pitch_max[8],seg_pitch_min[8],seg_pitch_stddev[8],seg_pitch_count[8],seg_pitch_sum[8],seg_pitch_avg[9],seg_pitch_max[9],seg_pitch_min[9],seg_pitch_stddev[9],seg_pitch_count[9],seg_pitch_sum[9],seg_pitch_avg[10],seg_pitch_max[10],seg_pitch_min[10],seg_pitch_stddev[10],seg_pitch_count[10],seg_pitch_sum[10],seg_pitch_avg[11],seg_pitch_max[11],seg_pitch_min[11],
seg_pitch_stddev[11],seg_pitch_count[11],seg_pitch_sum[11],seg_start_avg,seg_start_max,seg_start_min,seg_start_stddev, 
seg_start_count,seg_start_sum,seg_timbre_avg[0],seg_timbre_max[0],seg_timbre_min[0],seg_timbre_stddev[0],seg_timbre_count[0],
seg_timbre_sum[0],seg_timbre_avg[1],seg_timbre_max[1],seg_timbre_min[1],seg_timbre_stddev[1],seg_timbre_count[1],
seg_timbre_sum[1],seg_timbre_avg[2],seg_timbre_max[2],seg_timbre_min[2],seg_timbre_stddev[2],seg_timbre_count[2],
seg_timbre_sum[2],seg_timbre_avg[3],seg_timbre_max[3],seg_timbre_min[3],seg_timbre_stddev[3],seg_timbre_count[3],
seg_timbre_sum[3],seg_timbre_avg[4],seg_timbre_max[4],seg_timbre_min[4],seg_timbre_stddev[4],seg_timbre_count[4],
seg_timbre_sum[4],seg_timbre_avg[5],seg_timbre_max[5],seg_timbre_min[5],seg_timbre_stddev[5],seg_timbre_count[5],
seg_timbre_sum[5],seg_timbre_avg[6],seg_timbre_max[6],seg_timbre_min[6],seg_timbre_stddev[6],seg_timbre_count[6],
seg_timbre_sum[6],seg_timbre_avg[7],seg_timbre_max[7],seg_timbre_min[7],seg_timbre_stddev[7],seg_timbre_count[7],
seg_timbre_sum[7],seg_timbre_avg[8],seg_timbre_max[8],seg_timbre_min[8],seg_timbre_stddev[8],seg_timbre_count[8],
seg_timbre_sum[8],seg_timbre_avg[9],seg_timbre_max[9],seg_timbre_min[9],seg_timbre_stddev[9],seg_timbre_count[9],
seg_timbre_sum[9],seg_timbre_avg[10],seg_timbre_max[10],seg_timbre_min[10],seg_timbre_stddev[10],seg_timbre_count[10],
seg_timbre_sum[10],seg_timbre_avg[11],seg_timbre_max[11],seg_timbre_min[11],seg_timbre_stddev[11],seg_timbre_count[11],
seg_timbre_sum[11],tatms_c_avg,tatms_c_max,tatms_c_min,tatms_c_stddev,tatms_c_count,tatms_c_sum,tatms_start_avg,tatms_start_max,tatms_start_min,tatms_start_stddev,tatms_start_count,tatms_start_sum])






	    h5.close()
	    count=count+1;
	    print count;
Example #40
0
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = (
            "SongID,AlbumID,AlbumName,TrackId,ArtistID,ArtistLatitude,ArtistLocation,"
            +
            "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature," +
            "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"
            + "Title,Year")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,")
        outputFile1.write(csvRowString + "\n")
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "/home/umwangye/millonsong/MillionSongSubset/data/"  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            #song = Song(str(hdf5_getters.get_song_id(songH5File)))

            #testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)
            numPerH5 = hdf5_getters.get_num_songs(songH5File)

            for cnt in range(numPerH5):
                song = Song(str(hdf5_getters.get_song_id(songH5File, cnt)))
                song.trackId = str(hdf5_getters.get_track_id(songH5File, cnt))
                song.artistID = str(hdf5_getters.get_artist_id(
                    songH5File, cnt))
                song.albumID = str(
                    hdf5_getters.get_release_7digitalid(songH5File, cnt))
                song.albumName = str(hdf5_getters.get_release(songH5File, cnt))
                song.artistLatitude = str(
                    hdf5_getters.get_artist_latitude(songH5File, cnt))
                song.artistLocation = str(
                    hdf5_getters.get_artist_location(songH5File, cnt))
                song.artistLongitude = str(
                    hdf5_getters.get_artist_longitude(songH5File, cnt))
                song.artistName = str(
                    hdf5_getters.get_artist_name(songH5File, cnt))
                song.danceability = str(
                    hdf5_getters.get_danceability(songH5File, cnt))
                song.duration = str(hdf5_getters.get_duration(songH5File, cnt))
                # song.setGenreList()
                song.keySignature = str(hdf5_getters.get_key(songH5File, cnt))
                song.keySignatureConfidence = str(
                    hdf5_getters.get_key_confidence(songH5File, cnt))
                # song.lyrics = None
                # song.popularity = None
                song.tempo = str(hdf5_getters.get_tempo(songH5File, cnt))
                song.timeSignature = str(
                    hdf5_getters.get_time_signature(songH5File, cnt))
                song.timeSignatureConfidence = str(
                    hdf5_getters.get_time_signature_confidence(
                        songH5File, cnt))
                song.title = str(hdf5_getters.get_title(songH5File, cnt))
                song.year = str(hdf5_getters.get_year(songH5File, cnt))

                #print song count
                csvRowString += str(song.songCount) + ","

                for attribute in csvAttributeList:
                    # print "Here is the attribute: " + attribute + " \n"

                    if attribute == 'AlbumID'.lower():
                        csvRowString += song.albumID
                    elif attribute == 'AlbumName'.lower():
                        albumName = song.albumName
                        albumName = albumName.replace(',', "")
                        csvRowString += "\"" + albumName + "\""
                    elif attribute == 'TrackId'.lower():
                        csvRowString += song.trackId
                    elif attribute == 'ArtistID'.lower():
                        csvRowString += "\"" + song.artistID + "\""
                    elif attribute == 'ArtistLatitude'.lower():
                        latitude = song.artistLatitude
                        if latitude == 'nan':
                            latitude = ''
                        csvRowString += latitude
                    elif attribute == 'ArtistLocation'.lower():
                        location = song.artistLocation
                        location = location.replace(',', '')
                        csvRowString += "\"" + location + "\""
                    elif attribute == 'ArtistLongitude'.lower():
                        longitude = song.artistLongitude
                        if longitude == 'nan':
                            longitude = ''
                        csvRowString += longitude
                    elif attribute == 'ArtistName'.lower():
                        csvRowString += "\"" + song.artistName + "\""
                    elif attribute == 'Danceability'.lower():
                        csvRowString += song.danceability
                    elif attribute == 'Duration'.lower():
                        csvRowString += song.duration
                    elif attribute == 'KeySignature'.lower():
                        csvRowString += song.keySignature
                    elif attribute == 'KeySignatureConfidence'.lower():
                        # print "key sig conf: " + song.timeSignatureConfidence
                        csvRowString += song.keySignatureConfidence
                    elif attribute == 'SongID'.lower():
                        csvRowString += "\"" + song.id + "\""
                    elif attribute == 'Tempo'.lower():
                        # print "Tempo: " + song.tempo
                        csvRowString += song.tempo
                    elif attribute == 'TimeSignature'.lower():
                        csvRowString += song.timeSignature
                    elif attribute == 'TimeSignatureConfidence'.lower():
                        # print "time sig conf: " + song.timeSignatureConfidence
                        csvRowString += song.timeSignatureConfidence
                    elif attribute == 'Title'.lower():
                        csvRowString += "\"" + song.title + "\""
                    elif attribute == 'Year'.lower():
                        csvRowString += song.year

                    else:
                        csvRowString += "Erm. This didn't work. Error. :( :(\n"

                    csvRowString += ","

            #Remove the final comma from each row in the csv
                lastIndex = len(csvRowString)
                csvRowString = csvRowString[0:lastIndex - 1]
                csvRowString += "\n"
                outputFile1.write(csvRowString)
                csvRowString = ""

            songH5File.close()

    outputFile1.close()
Example #41
0
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, Year and Hotttnesss.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'SongHotttnesss'.lower():
                    csvRowString += 'SongHotttnesss'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print("==============")
                    print("I believe there has been an error with the input.")
                    print("==============")
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = (
            "SongID,AlbumID,AlbumName,ArtistFamiliarity,ArtistHotttnesss,ArtistID,"
            + "ArtistLatitude,ArtistLocation," +
            "ArtistLongitude,ArtistName,BarsConfidence,BarsStart,BeatsConfidence,"
            +
            "BeatsStart,Danceability,Duration,EndOfFadeIn,Energy,KeySignature,"
            +
            "KeySignatureConfidence,Loudness,Mode,ModeConfidence,SectionsConfidence,"
            + "SectionsStart,SegmentsConfidence,SegmentsLoudnessMax," +
            "SegmentsLoudnessMaxTime,SegmentsLoudnessMaxStart,SegmentsPitches,"
            + "SegmentsStart,SegmentsTimbre,SongHotttnesss,TatumsConfidence," +
            "TatumsStart,Tempo,TimeSignature," +
            "TimeSignatureConfidence,Title,Year")
        '''
        csvRowString = ("SongID,AlbumID,AlbumName,ArtistFamiliarity,"+
            "ArtistHotttnesss,ArtistID,"+
            "ArtistLatitude,ArtistLocation,"+
            "ArtistLongitude,ArtistName,"+
            "BarsConfidence,BarsStart,"+
            "Danceability,Duration,EndOfFadeIn,Energy,KeySignature,"+
            "KeySignatureConfidence,Loudness,Mode,ModeConfidence,"+
            "SegmentsPitches,"+
            "SegmentsTimbre,SongHotttnesss,"+
            "Tempo,TimeSignature,"+
            "TimeSignatureConfidence,Title,Year")
        '''
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,")
        outputFile1.write(csvRowString + "\n")
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    #basedir = "./I/"
    basedir = "./MillionSongSubset/data/"  # "." As the default means the current directory
    #basedir = "/Users/dafirebanks/Downloads/MillionSongSubset/data/" # "." As the default means the current directory
    ext = "*.h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, ext))
        for f in files:
            print(f)
            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            #testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistFamiliarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.artistHotttnesss = str(
                hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.barsConfidence = str(
                hdf5_getters.get_bars_confidence(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.barsStart = str(
                hdf5_getters.get_bars_start(songH5File)).replace(",",
                                                                 "").replace(
                                                                     "\n", "")
            song.beatsConfidence = str(
                hdf5_getters.get_beats_confidence(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.beatsStart = str(
                hdf5_getters.get_beats_start(songH5File)).replace(",",
                                                                  "").replace(
                                                                      "\n", "")
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            # song.setGenreList()
            song.hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.modeConfidence = str(
                hdf5_getters.get_mode_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.sectionsConfidence = str(
                hdf5_getters.get_sections_confidence(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.sectionsStart = str(
                hdf5_getters.get_segments_start(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.segmentsConfidence = str(
                hdf5_getters.get_segments_confidence(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.segmentsLoudnessMax = str(
                hdf5_getters.get_segments_loudness_max(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.segmentsLoudnessMaxTime = str(
                hdf5_getters.get_segments_loudness_max_time(
                    songH5File)).replace(",", "").replace("\n", "")
            song.segmentsLoudnessMaxStart = str(
                hdf5_getters.get_segments_loudness_start(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.segmentsPitches = str(
                hdf5_getters.get_segments_pitches(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.segmentsStart = str(
                hdf5_getters.get_segments_start(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.segmentsTimbre = str(
                hdf5_getters.get_segments_timbre(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.startOfFadeOut = str(
                hdf5_getters.get_start_of_fade_out(songH5File))
            song.tatumsConfidence = str(
                hdf5_getters.get_tatums_confidence(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.tatumsStart = str(
                hdf5_getters.get_tatums_start(songH5File)).replace(
                    ",", "").replace("\n", "")
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))

            #print song count
            csvRowString += str(song.songCount) + ","
            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',', "")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistFamiliarity'.lower():
                    csvRowString += song.artistFamiliarity
                elif attribute == 'ArtistHotttnesss'.lower():
                    csvRowString += song.artistHotttnesss
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',', '')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""
                elif attribute == 'BarsConfidence'.lower():
                    csvRowString += song.barsConfidence
                elif attribute == 'BarsStart'.lower():
                    csvRowString += song.barsStart
                elif attribute == 'BeatsConfidence'.lower():
                    csvRowString += song.beatsConfidence
                elif attribute == 'BeatsStart'.lower():
                    csvRowString += song.beatsStart
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'EndOfFadeIn'.lower():
                    csvRowString += song.endOfFadeIn
                elif attribute == 'Energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'Loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'Mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'ModeConfidence'.lower():
                    csvRowString += song.modeConfidence
                elif attribute == 'SectionsConfidence'.lower():
                    csvRowString += song.sectionsConfidence
                elif attribute == 'SectionsStart'.lower():
                    csvRowString += song.sectionsStart
                elif attribute == 'SegmentsConfidence'.lower():
                    csvRowString += song.segmentsConfidence
                elif attribute == 'SegmentsLoudnessMax'.lower():
                    csvRowString += song.segmentsLoudnessMax
                elif attribute == 'SegmentsLoudnessMaxTime'.lower():
                    csvRowString += song.segmentsLoudnessMaxTime
                elif attribute == 'SegmentsLoudnessMaxStart'.lower():
                    csvRowString += song.segmentsLoudnessMaxStart
                elif attribute == 'SegmentsPitches'.lower():
                    csvRowString += song.segmentsPitches
                elif attribute == 'SegmentsStart'.lower():
                    csvRowString += song.segmentsStart
                elif attribute == 'SegmentsTimbre'.lower():
                    csvRowString += song.segmentsTimbre
                elif attribute == 'SongHotttnesss'.lower():
                    csvRowString += song.hotttnesss
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'StartOfFadeOut'.lower():
                    csvRowString += song.startOfFadeOut
                elif attribute == 'TatumsConfidence'.lower():
                    csvRowString += song.tatumsConfidence
                elif attribute == 'TatumsStart'.lower():
                    csvRowString += song.tatumsStart
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","
            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()