def read(self, path):
        files = os.listdir(path)
        import csv
        with open('library_csv.csv', 'w') as library_csv:
            writer = csv.writer(library_csv)
            writer.writerow([
                'Loudness', 'Danceability', 'Energy', 'Tempo', 'timeSignature',
                'Title'
            ])
            # get params
            for filename in files:
                # hdf5path = filename
                hdf5path = "Data/" + filename
                # hdf5path.replace("'","",2)
                # sanity check
                if not os.path.isfile(hdf5path):
                    print 'ERROR: file', hdf5path, 'does not exist.'
                    sys.exit(0)
                h5 = hdf5_getters.open_h5_file_read(hdf5path)
                # get all getters
                loudness = hdf5.get_loudness(h5)
                dance = hdf5.get_danceability(h5)
                energy = hdf5.get_energy(h5)
                tempo = hdf5.get_tempo(h5)
                ts = hdf5.get_time_signature(h5)
                title = hdf5.get_title(h5)

                writer.writerow([loudness, dance, energy, tempo, ts, title])
                # print them
                h5.close()
        library_csv.close()
Exemple #2
0
def func_to_extract_features(filename):
    """
    This function extracts all features: per-track, per-section and per-segment
    """
#    - open the song file
    h5 = GETTERS.open_h5_file_read(filename)
#    - get per-track features and put them

    artist_id = GETTERS.get_artist_id(h5)
    song_id   = GETTERS.get_song_id(h5)

    artist_familiarity          = GETTERS.get_artist_familiarity(h5)
    artist_hotttnesss           = GETTERS.get_artist_hotttnesss(h5)
    artist_latitude             = GETTERS.get_artist_latitude(h5)
    artist_longitude            = GETTERS.get_artist_longitude(h5)
    danceability                = GETTERS.get_danceability(h5)
    energy                      = GETTERS.get_energy(h5)
    loudness                    = GETTERS.get_loudness(h5)
    song_hotttnesss             = GETTERS.get_song_hotttnesss(h5)
    tempo                       = GETTERS.get_tempo(h5)
    year                        = GETTERS.get_year(h5)

#   artist_ids.add(artist_id)

#    features_tuple = (artist_id, artist_familiarity, artist_hotttnesss, artist_latitude, artist_longitude, danceability, energy, loudness, song_hotttnesss, tempo, year)
    features_tuple = (artist_id, artist_familiarity, artist_hotttnesss, loudness, song_hotttnesss, tempo, year)
 #   print features_tuple
    
    features_tuples[song_id] = features_tuple
    
#    files_per_artist[artist_id] += 1
#    - close the file
    h5.close()
Exemple #3
0
def get_attribute(files):
    array = []
    count = 0
    for f in files:
        temp = []
        count += 1
        print(f)
        h5 = hdf5_getters.open_h5_file_read(f)
        temp.append(hdf5_getters.get_num_songs(h5))
        temp.append(hdf5_getters.get_artist_familiarity(h5))
        temp.append(hdf5_getters.get_artist_hotttnesss(h5))
        temp.append(hdf5_getters.get_danceability(h5))
        temp.append(hdf5_getters.get_energy(h5))
        temp.append(hdf5_getters.get_key(h5))
        temp.append(hdf5_getters.get_key_confidence(h5))
        temp.append(hdf5_getters.get_loudness(h5))
        temp.append(hdf5_getters.get_mode(h5))
        temp.append(hdf5_getters.get_mode_confidence(h5))
        temp.append(hdf5_getters.get_tempo(h5))
        temp.append(hdf5_getters.get_time_signature(h5))
        temp.append(hdf5_getters.get_time_signature_confidence(h5))
        temp.append(hdf5_getters.get_title(h5))
        temp.append(hdf5_getters.get_artist_name(h5))
        temp = np.nan_to_num(temp)
        array.append(temp)
        # if count%100 ==0:
        # print(array[count-100:count-1])
        # kmean.fit(array[count-100:count-1])
        h5.close()
    return array
Exemple #4
0
def func_to_desired_song_data(filename):
    h5 = GETTERS.open_h5_file_read(filename)
    track_id = GETTERS.get_track_id(h5)
    for song in random_songs:
        if song[0] == track_id:
            print("FOUND ONE!")
            title = replace_characters(GETTERS.get_title(h5))
            artist = replace_characters(GETTERS.get_artist_name(h5))
            year = GETTERS.get_year(h5)
            tempo = GETTERS.get_tempo(h5)
            key = GETTERS.get_key(h5)
            loudness = GETTERS.get_loudness(h5)
            energy = GETTERS.get_energy(h5)
            danceability = GETTERS.get_danceability(h5)
            time_signature = GETTERS.get_time_signature(h5)
            mode = GETTERS.get_mode(h5)
            hotttness = GETTERS.get_song_hotttnesss(h5)

            song_data = {
                'title': title,
                'artist': artist,
                'year': year,
                'tempo': tempo,
                'key': key,
                'loudness': loudness,
                'energy': energy,
                'danceability': danceability,
                'time_signature': time_signature,
                'mode': mode,
                'hotttness': hotttness
            }

            all_the_data.append(song_data)

    h5.close()
def getSongProperties(songCount = 3000, splitData = True):
    songDict = {}
    songIdDict = {}
    songIdCount = 0
    for root, dirs, files in os.walk(msd_subset_data_path):
        files = glob.glob(os.path.join(root,'*.h5'))
        for f in files:
            h5 = GETTERS.open_h5_file_read(f)
            tempo = GETTERS.get_tempo(h5)
            danceability = GETTERS.get_danceability(h5)
            energy = GETTERS.get_energy(h5)
            loudness = GETTERS.get_loudness(h5)
            #print GETTERS.get_artist_terms(h5)
            timbre = GETTERS.get_segments_timbre(h5)
            artist_hotness = GETTERS.get_artist_hotttnesss(h5)
            song_key = GETTERS.get_key(h5)
            songIdDict[GETTERS.get_song_id(h5)] = songIdCount
            songDict[songIdCount] = [tempo,danceability,energy,loudness,artist_hotness,song_key]
            songIdCount += 1
            h5.close()
            #if len(songDict) >2:
             #   break
        #if len(songDict) >2:
         #   break
        if songIdCount > songCount and splitData:
            break
    return songIdDict,songDict
Exemple #6
0
def extract_features(filename):
    h5 = hdf5_getters.open_h5_file_read(filename)
    f = [None] * len(features)
    f[features.index('track_id')] = hdf5_getters.get_track_id(h5, 0).item()
    f[features.index('song_id')] = hdf5_getters.get_song_id(h5, 0).item()
    f[features.index('hotttnesss')] = hdf5_getters.get_artist_hotttnesss(
        h5, 0).item()
    f[features.index('danceability')] = hdf5_getters.get_danceability(
        h5, 0).item()
    f[features.index('duration')] = hdf5_getters.get_duration(h5, 0).item()
    f[features.index('key')] = hdf5_getters.get_key(h5, 0).item()
    f[features.index('energy')] = hdf5_getters.get_energy(h5, 0).item()
    f[features.index('loudness')] = hdf5_getters.get_loudness(h5, 0).item()
    f[features.index('year')] = hdf5_getters.get_year(h5, 0).item()
    f[features.index('time_signature')] = hdf5_getters.get_time_signature(
        h5, 0).item()
    f[features.index('tempo')] = hdf5_getters.get_tempo(h5, 0).item()
    tags = ''
    for tag in hdf5_getters.get_artist_terms(h5):
        tags += ('%s|' % tag)
    # Remove trailing pipe.
    tags = tags[:len(tags) - 1]
    f[features.index('tags')] = tags
    h5.close()
    return f
Exemple #7
0
def h5_to_csv_fields(h5,song):
	'''Converts h5 format to text
		Inputs: h5, an h5 file object, usable with the wrapper code MSongsDB
			song, an integer, representing which song in the h5 file to take the info out of (h5 files contain many songs)
		Output: a string representing all the information of this song, as a single line of a csv file
	'''
	rv=[]
	##All these are regular getter functions from wrapper code
	rv.append(gt.get_artist_name(h5,song))
	rv.append(gt.get_title(h5, song))
	rv.append(gt.get_release(h5, song))
	rv.append(gt.get_year(h5,song))
	rv.append(gt.get_duration(h5,song))
	rv.append(gt.get_artist_familiarity(h5,song))
	rv.append(gt.get_artist_hotttnesss(h5,song))
	rv.append(gt.get_song_hotttnesss(h5, song))
	
	##artist_terms, artist_terms_freq, and artist_terms_weight getter functions
	##are all arrays, so we need to turn them into strings first. We used '_' as a separator
	rv.append(array_to_csv_field(list(gt.get_artist_terms(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_artist_terms_freq(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_artist_terms_weight(h5,song))))
	rv.append(gt.get_mode(h5,song))
	rv.append(gt.get_key(h5,song))
	rv.append(gt.get_tempo(h5,song))
	rv.append(gt.get_loudness(h5,song))
	rv.append(gt.get_danceability(h5,song))
	rv.append(gt.get_energy(h5,song))
	rv.append(gt.get_time_signature(h5,song))
	rv.append(array_to_csv_field(list(gt.get_segments_start(h5,song))))
	##These arrays have vectors (Arrays) as items, 12 dimensional each
	##An array like [[1,2,3],[4,5,6]] will be written to csv as '1;2;3_4;5;6', i.e. there's two types of separators
	rv.append(double_Array_to_csv_field(list(gt.get_segments_timbre(h5,song)),'_',';'))
	rv.append(double_Array_to_csv_field(list(gt.get_segments_pitches(h5,song)),'_',';'))
	rv.append(array_to_csv_field(list(gt.get_segments_loudness_start(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_segments_loudness_max(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_segments_loudness_max_time(h5,song))))
	rv.append(array_to_csv_field(list(gt.get_sections_start(h5,song))))
	##turn this list into a string with comma separators (i.e. a csv line)
	rv_string=array_to_csv_field(rv, ",")
	rv_string+="\n"
	return rv_string
def get_attribute(f):
    temp = []
    count += 1
    print(f)
    h5 = hdf5_getters.open_h5_file_read(f)
    temp.append(hdf5_getters.get_num_songs(h5))
    temp.append(hdf5_getters.get_artist_familiarity(h5))
    temp.append(hdf5_getters.get_artist_hotttnesss(h5))
    temp.append(hdf5_getters.get_danceability(h5))
    temp.append(hdf5_getters.get_energy(h5))
    temp.append(hdf5_getters.get_key(h5))
    temp.append(hdf5_getters.get_key_confidence(h5))
    temp.append(hdf5_getters.get_loudness(h5))
    temp.append(hdf5_getters.get_mode(h5))
    temp.append(hdf5_getters.get_mode_confidence(h5))
    temp.append(hdf5_getters.get_tempo(h5))
    temp.append(hdf5_getters.get_time_signature(h5))
    temp.append(hdf5_getters.get_time_signature_confidence(h5))
    temp = np.nan_to_num(temp)
    array.append(temp)
    h5.close()
Exemple #9
0
def get_all_attributes(filename):
    """
    This function does 3 simple things:
    - open the song file
    - get all required attributes
    - write it to a csv file 
    - close the files
    """
    with open('attributes.csv', 'a') as csvfile:
        try:
            # let's apply the previous function to all files
            csvwriter = csv.writer(csvfile, delimiter='\t')
            h5 = GETTERS.open_h5_file_read(filename)
            RESULTS = []
            RESULTS.append(GETTERS.get_year(h5))
            RESULTS.append(GETTERS.get_artist_id(h5))
            RESULTS.append(GETTERS.get_artist_name(h5))
            RESULTS.append(GETTERS.get_artist_mbid(h5))
            RESULTS.append(convert_terms(GETTERS.get_artist_terms(h5)))
            RESULTS.append(GETTERS.get_artist_hotttnesss(h5))
            RESULTS.append(GETTERS.get_artist_latitude(h5))
            RESULTS.append(GETTERS.get_artist_longitude(h5))
            RESULTS.append(GETTERS.get_artist_familiarity(h5))
            RESULTS.append(GETTERS.get_danceability(h5))
            RESULTS.append(GETTERS.get_duration(h5))
            RESULTS.append(GETTERS.get_energy(h5))
            RESULTS.append(GETTERS.get_loudness(h5))
            RESULTS.append(GETTERS.get_song_hotttnesss(h5))
            RESULTS.append(GETTERS.get_song_id(h5))
            RESULTS.append(GETTERS.get_tempo(h5))
            RESULTS.append(GETTERS.get_time_signature(h5))
            RESULTS.append(GETTERS.get_title(h5))
            RESULTS.append(GETTERS.get_track_id(h5))
            RESULTS.append(GETTERS.get_release(h5))
            csvwriter.writerow(RESULTS)
            h5.close()
        except AttributeError:
            pass
def get_feats(h5):
    f = []
    f.append(hdf5_getters.get_artist_name(h5).decode('utf8').replace(',', ''))
    f.append(hdf5_getters.get_title(h5).decode('utf8').replace(',', ''))
    f.append(str(hdf5_getters.get_loudness(h5)))
    f.append(str(hdf5_getters.get_tempo(h5)))
    f.append(str(hdf5_getters.get_time_signature(h5)))
    f.append(str(hdf5_getters.get_key(h5)))
    f.append(str(hdf5_getters.get_mode(h5)))
    f.append(str(hdf5_getters.get_duration(h5)))
    f.extend(get_statistical_feats(hdf5_getters.get_segments_timbre(h5)))
    f.extend(get_statistical_feats(hdf5_getters.get_segments_pitches(h5)))
    f.extend(get_statistical_feats(hdf5_getters.get_segments_loudness_max(h5)))
    f.extend(
        get_statistical_feats(hdf5_getters.get_segments_loudness_max_time(h5)))
    f.extend(
        get_statistical_feats(hdf5_getters.get_segments_loudness_start(h5)))
    f.append(str(hdf5_getters.get_song_hotttnesss(h5)))
    f.append(str(hdf5_getters.get_danceability(h5)))
    f.append(str(hdf5_getters.get_end_of_fade_in(h5)))
    f.append(str(hdf5_getters.get_energy(h5)))
    f.append(str(hdf5_getters.get_start_of_fade_out(h5)))
    f.append(str(hdf5_getters.get_year(h5)))
    return f
            for sub_folder in os.listdir(path + '/' + parent_folder):
                for child_folder in os.listdir(path + '/' + parent_folder +
                                               '/' + sub_folder):
                    for file in os.listdir(path + '/' + parent_folder + '/' +
                                           sub_folder + '/' + child_folder):
                        with h5.open_h5_file_read(path + '/' + parent_folder +
                                                  '/' + sub_folder + '/' +
                                                  child_folder + '/' +
                                                  file) as ds:

                            row = []
                            row += [h5.get_artist_terms(ds)]
                            row += [h5.get_artist_terms_freq(ds)]
                            row += [h5.get_artist_terms_weight(ds)]
                            row += [h5.get_danceability(ds)]
                            row += [h5.get_energy(ds)]
                            row += [h5.get_key(ds)]
                            row += [h5.get_mode(ds)]
                            row += [h5.get_loudness(ds)]
                            row += [
                                parent_folder + '/' + sub_folder + '/' +
                                child_folder + '/'
                            ]
                            row += [file]

                            row += [h5.get_duration(ds)]
                            row += [h5.get_artist_familiarity(ds)]
                            row += [h5.get_similar_artists(ds)]
                            row += [h5.get_artist_id(ds)]
                            row += [h5.get_title(ds)]
                            row += [h5.get_song_hotttnesss(ds)]
def hd5_single_random_file_parser():
    # Open an h5 file in read mode
    h5 = hdf5_getters.open_h5_file_read(
        '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5'
    )

    function_tracker = filter(
        lambda x: x.startswith('get'),
        hdf5_getters.__dict__.keys())  # Detects all the getter functions

    for f in function_tracker:  # Print everything in function tracker
        print(f)

    # First effort to check what each field contains.
    print()  # 55 available fields (exluding number of songs fields)
    print("Num of songs -- ",
          hdf5_getters.get_num_songs(h5))  # One song per file
    print("Title -- ",
          hdf5_getters.get_title(h5))  # Print the title of a specific h5 file
    print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5))
    print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5))
    print("Artist ID -- ", hdf5_getters.get_artist_id(h5))
    print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5))
    print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5))
    print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5))
    print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5))
    print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5))
    print("Artist location -- ", hdf5_getters.get_artist_location(h5))
    print("Artist Name -- ", hdf5_getters.get_artist_name(h5))
    print("Release -- ", hdf5_getters.get_release(h5))
    print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5))
    print("Song ID -- ", hdf5_getters.get_song_id(h5))
    print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5))
    print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5))
    print("Similar artists -- ", hdf5_getters.get_similar_artists(h5))
    print("Artist terms -- ", hdf5_getters.get_artist_terms(h5))
    print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5))
    print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5))
    print("Analysis sample rate -- ",
          hdf5_getters.get_analysis_sample_rate(h5))
    print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5))
    print("Danceability -- ", hdf5_getters.get_danceability(h5))
    print("Duration -- ", hdf5_getters.get_duration(h5))
    print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5))
    print("Energy -- ", hdf5_getters.get_energy(h5))
    print("Key -- ", hdf5_getters.get_key(h5))
    print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5))
    print("Loudness -- ", hdf5_getters.get_loudness(h5))
    print("Mode -- ", hdf5_getters.get_mode(h5))
    print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5))
    print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5))
    print("Tempo -- ", hdf5_getters.get_tempo(h5))
    print("Time signature -- ", hdf5_getters.get_time_signature(h5))
    print("Time signature confidence -- ",
          hdf5_getters.get_time_signature_confidence(h5))
    print("Track ID -- ", hdf5_getters.get_track_id(h5))
    print("Segments Start -- ", hdf5_getters.get_segments_start(h5))
    print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5))
    print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5))
    print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5))
    print("Segments Loudness max -- ",
          hdf5_getters.get_segments_loudness_max(h5))
    print("Segments Loudness max time-- ",
          hdf5_getters.get_segments_loudness_max_time(h5))
    print("Segments Loudness start -- ",
          hdf5_getters.get_segments_loudness_start(h5))
    print("Sections start -- ", hdf5_getters.get_sections_start(h5))
    print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5))
    print("Beats start -- ", hdf5_getters.get_beats_start(h5))
    print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5))
    print("Bars start -- ", hdf5_getters.get_bars_start(h5))
    print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5))
    print("Tatums start -- ", hdf5_getters.get_tatums_start(h5))
    print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5))
    print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5))
    print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5))
    print("Year -- ", hdf5_getters.get_year(h5))

    fields = ['Title', 'Artist ID']

    with open('Tester2.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';')

        # writing the fields
        csv_writer.writerow(fields)

        # writing the data rows
        csv_writer.writerow(
            [hdf5_getters.get_title(h5),
             hdf5_getters.get_artist_id(h5)])

    h5.close()  # close h5 when completed in the end
Exemple #13
0
glob_path = '/Users/kenleejr92/MillionSongSubset/data/*/*/*/*'
filepaths = glob.glob(glob_path)
for filepath in tqdm(filepaths):
    h5 = hdf5_getters.open_h5_file_read(filepath)
    n = hdf5_getters.get_num_songs(h5)
    for row in range(n):
        song_id = hdf5_getters.get_song_id(h5, songidx=row).decode('UTF-8')
        #         artist = hdf5_getters.get_artist_name(h5,songidx=row).decode('UTF-8')
        #         title= hdf5_getters.get_title(h5,songidx=row)#.decode('UTF-8')
        #         artist = "".join(c for c in unicodedata.normalize('NFD', str(artist.decode("utf8"))) if unicodedata.category(c) != "Mn")
        #         title = "".join(c for c in unicodedata.normalize('NFD', str(title.decode("utf8"))) if unicodedata.category(c) != "Mn")

        #single number features
        danceability = hdf5_getters.get_danceability(h5, songidx=row)
        duration = hdf5_getters.get_duration(h5, songidx=row)
        energy = hdf5_getters.get_energy(h5, songidx=row)
        loudness = hdf5_getters.get_loudness(h5, songidx=row)
        musicalKey = hdf5_getters.get_key(h5, songidx=row)
        mode = hdf5_getters.get_mode(h5, songidx=row)
        tempo = hdf5_getters.get_tempo(h5, songidx=row)
        time_signature = hdf5_getters.get_time_signature(h5, songidx=row)
        year = hdf5_getters.get_year(h5, songidx=row)
        song_hottness = hdf5_getters.get_song_hotttnesss(h5, songidx=row)
        end_of_fade_in = hdf5_getters.get_end_of_fade_in(h5, songidx=row)
        start_of_fade_out = hdf5_getters.get_start_of_fade_out(h5, songidx=row)

        #timestamp features
        #take last element and divide by length to get beats/unit time, segments/unit_time
        bars_start = hdf5_getters.get_bars_start(h5, songidx=row)
        beats_start = hdf5_getters.get_beats_start(h5, songidx=row)
        sections_start = hdf5_getters.get_sections_start(h5, songidx=row)
def data_to_flat_file(basedir,ext='.h5') :
    """This function extract the information from the tables and creates the flat file."""	
    count = 0;	#song counter
    list_to_write= []
    row_to_write = ""
    writer = csv.writer(open("metadata_wholeA.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    print f	#the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
	    comma=title.find(',')	#eliminating commas in the title
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')	#eliminating commas in the album	
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')	#eliminating double quotes
	    duration = hdf5_getters.get_duration(h5)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	            artist_hotness=-1
	    artist_id = hdf5_getters.get_artist_id(h5)
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
	    artist_loc = hdf5_getters.get_artist_location(h5)
		#checks artist_loc to see if it is a hyperlink if it is set as empty string
	    artist_loc = artist_loc.replace(",", "\,");
	    if artist_loc.startswith("<a"):
                artist_loc = ""
	    if len(artist_loc) > 100:
                artist_loc = ""
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            song_key = hdf5_getters.get_key(h5)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
	    mode = hdf5_getters.get_mode(h5)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
	    song_id = hdf5_getters.get_song_id(h5)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
	    time_sig = hdf5_getters.get_time_signature(h5)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
	    track_id = hdf5_getters.get_track_id(h5)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
	    year = hdf5_getters.get_year(h5)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
	    bars_c_avg= get_avg(bars_c)
	    bars_c_max= get_max(bars_c)
	    bars_c_min = get_min(bars_c)
	    bars_c_stddev= get_stddev(bars_c)
	    bars_c_count = get_count(bars_c)
	    bars_c_sum = get_sum(bars_c)
	    bars_start = hdf5_getters.get_bars_start(h5)
	    bars_start_avg = get_avg(bars_start)
	    bars_start_max= get_max(bars_start)
	    bars_start_min = get_min(bars_start)
	    bars_start_stddev= get_stddev(bars_start)
	    bars_start_count = get_count(bars_start)
	    bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg= get_avg(beats_c)
	    beats_c_max= get_max(beats_c)
	    beats_c_min = get_min(beats_c)
	    beats_c_stddev= get_stddev(beats_c)
	    beats_c_count = get_count(beats_c)
	    beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
 	    beats_start_avg = get_avg(beats_start)
	    beats_start_max= get_max(beats_start)
	    beats_start_min = get_min(beats_start)
	    beats_start_stddev= get_stddev(beats_start)
	    beats_start_count = get_count(beats_start)
	    beats_start_sum = get_sum(beats_start)
	    sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg= get_avg(sec_c)
	    sec_c_max= get_max(sec_c)
	    sec_c_min = get_min(sec_c)
	    sec_c_stddev= get_stddev(sec_c)
	    sec_c_count = get_count(sec_c)
	    sec_c_sum = get_sum(sec_c)
	    sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
	    sec_start_max= get_max(sec_start)
	    sec_start_min = get_min(sec_start)
	    sec_start_stddev= get_stddev(sec_start)
	    sec_start_count = get_count(sec_start)
	    sec_start_sum = get_sum(sec_start)
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    seg_c_avg= get_avg(seg_c)
	    seg_c_max= get_max(seg_c)
	    seg_c_min = get_min(seg_c)
	    seg_c_stddev= get_stddev(seg_c)
	    seg_c_count = get_count(seg_c)
	    seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg= get_avg(seg_loud_max)
	    seg_loud_max_max= get_max(seg_loud_max)
	    seg_loud_max_min = get_min(seg_loud_max)
	    seg_loud_max_stddev= get_stddev(seg_loud_max)
	    seg_loud_max_count = get_count(seg_loud_max)
	    seg_loud_max_sum = get_sum(seg_loud_max)
	    seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
	    seg_loud_max_time_avg= get_avg(seg_loud_max_time)
	    seg_loud_max_time_max= get_max(seg_loud_max_time)
	    seg_loud_max_time_min = get_min(seg_loud_max_time)
	    seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
	    seg_loud_max_time_count = get_count(seg_loud_max_time)
	    seg_loud_max_time_sum = get_sum(seg_loud_max_time)
	    seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
	    seg_loud_start_avg= get_avg(seg_loud_start)
	    seg_loud_start_max= get_max(seg_loud_start)
	    seg_loud_start_min = get_min(seg_loud_start)
	    seg_loud_start_stddev= get_stddev(seg_loud_start)
	    seg_loud_start_count = get_count(seg_loud_start)
	    seg_loud_start_sum = get_sum(seg_loud_start)					      
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    pitch_size = len(seg_pitch)
	    seg_start = hdf5_getters.get_segments_start(h5)
	    seg_start_avg= get_avg(seg_start)
	    seg_start_max= get_max(seg_start)
	    seg_start_min = get_min(seg_start)
	    seg_start_stddev= get_stddev(seg_start)
	    seg_start_count = get_count(seg_start)
	    seg_start_sum = get_sum(seg_start)
	    seg_timbre = hdf5_getters.get_segments_timbre(h5)
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    tatms_c_avg= get_avg(tatms_c)
	    tatms_c_max= get_max(tatms_c)
	    tatms_c_min = get_min(tatms_c)
	    tatms_c_stddev= get_stddev(tatms_c)
	    tatms_c_count = get_count(tatms_c)
	    tatms_c_sum = get_sum(tatms_c)
	    tatms_start = hdf5_getters.get_tatums_start(h5)
	    tatms_start_avg= get_avg(tatms_start)
	    tatms_start_max= get_max(tatms_start)
	    tatms_start_min = get_min(tatms_start)
	    tatms_start_stddev= get_stddev(tatms_start)
	    tatms_start_count = get_count(tatms_start)
	    tatms_start_sum = get_sum(tatms_start)
	
	    #Getting the genres
	    genre_set = 0    #flag to see if the genre has been set or not
	    art_trm = hdf5_getters.get_artist_terms(h5)
	    trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			    for i in genres_so_far:
				final_genre.append(i)
				genre_set=1				#genre was found in dictionary
				  
		
	    
	    if genre_set == 1:
		    col_num=[]
		   
		    for genre in final_genre:
			    column=int(genre)				#getting the column number of the genre
			    col_num.append(column)

		    genre_array=genre_columns(col_num)	         #genre array
 	    else:
		    genre_array=genre_columns(-1)		#the genre was not found in the dictionary

	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1
		


		#Writing to the flat file
            writer.writerow([title,album,artist_name,year,duration,seg_start_count, tempo])

	    h5.close()
	    count=count+1;
	    print count;
def classify(h5):
	output_array={}
	# duration
	duration=hdf5_getters.get_duration(h5)
	output_array["duration"]=duration	### ADDED VALUE TO ARRAY
	# number of bars
	bars=hdf5_getters.get_bars_start(h5)
	num_bars=len(bars)
	output_array["num_bars"]=num_bars	### ADDED VALUE TO ARRAY
	# mean and variance in bar length
	bar_length=numpy.ediff1d(bars)
	variance_bar_length=numpy.var(bar_length)
	output_array["variance_bar_length"]=variance_bar_length	### ADDED VALUE TO ARRAY
	# number of beats
	beats=hdf5_getters.get_beats_start(h5)
	num_beats=len(beats)
	output_array["num_beats"]=num_beats	### ADDED VALUE TO ARRAY
	# mean and variance in beats length
	beats_length=numpy.ediff1d(beats)
	variance_beats_length=numpy.var(bar_length)
	output_array["variance_beats_length"]=variance_beats_length	### ADDED VALUE TO ARRAY
	# danceability
	danceability=hdf5_getters.get_danceability(h5)
	output_array["danceability"]=danceability	### ADDED VALUE TO ARRAY
	# end of fade in
	end_of_fade_in=hdf5_getters.get_end_of_fade_in(h5)
	output_array["end_of_fade_in"]=end_of_fade_in	### ADDED VALUE TO ARRAY
	# energy
	energy=hdf5_getters.get_energy(h5)
	output_array["energy"]=energy	### ADDED VALUE TO ARRAY
	# key
	key=hdf5_getters.get_key(h5)
	output_array["key"]=int(key)	### ADDED VALUE TO ARRAY
	# loudness
	loudness=hdf5_getters.get_loudness(h5)
	output_array["loudness"]=loudness	### ADDED VALUE TO ARRAY
	# mode
	mode=hdf5_getters.get_mode(h5)
	output_array["mode"]=int(mode)	### ADDED VALUE TO ARRAY
	# number sections
	sections=hdf5_getters.get_sections_start(h5)
	num_sections=len(sections)
	output_array["num_sections"]=num_sections	### ADDED VALUE TO ARRAY
	# mean and variance in sections length
	sections_length=numpy.ediff1d(sections)
	variance_sections_length=numpy.var(sections)
	output_array["variance_sections_length"]=variance_sections_length	### ADDED VALUE TO ARRAY
	# number segments
	segments=hdf5_getters.get_segments_start(h5)
	num_segments=len(segments)
	output_array["num_segments"]=num_segments	### ADDED VALUE TO ARRAY
	# mean and variance in segments length
	segments_length=numpy.ediff1d(segments)
	variance_segments_length=numpy.var(segments)
	output_array["variance_segments_length"]=variance_segments_length	### ADDED VALUE TO ARRAY
	# segment loudness max
	segment_loudness_max_array=hdf5_getters.get_segments_loudness_max(h5)
	segment_loudness_max_time_array=hdf5_getters.get_segments_loudness_max_time(h5)
	segment_loudness_max_index=0
	for i in range(len(segment_loudness_max_array)):
		if segment_loudness_max_array[i]>segment_loudness_max_array[segment_loudness_max_index]:
			segment_loudness_max_index=i
	segment_loudness_max=segment_loudness_max_array[segment_loudness_max_index]
	segment_loudness_max_time=segment_loudness_max_time_array[segment_loudness_max_index]
	output_array["segment_loudness_max"]=segment_loudness_max	### ADDED VALUE TO ARRAY
	output_array["segment_loudness_time"]=segment_loudness_max_time	### ADDED VALUE TO ARRAY
			
	# POSSIBLE TODO: use average function instead and weight by segment length
	# segment loudness mean (start)
	segment_loudness_array=hdf5_getters.get_segments_loudness_start(h5)
	segment_loudness_mean=numpy.mean(segment_loudness_array)
	output_array["segment_loudness_mean"]=segment_loudness_mean	### ADDED VALUE TO ARRAY
	# segment loudness variance (start)
	segment_loudness_variance=numpy.var(segment_loudness_array)
	output_array["segment_loudness_variance"]=segment_loudness_variance	### ADDED VALUE TO ARRAY
	# segment pitches
	segment_pitches_array=hdf5_getters.get_segments_pitches(h5)
	segment_pitches_mean=numpy.mean(segment_pitches_array,axis=0).tolist()
	output_array["segment_pitches_mean"]=segment_pitches_mean
	# segment pitches variance (start)
	segment_pitches_variance=numpy.var(segment_pitches_array,axis=0).tolist()
	output_array["segment_pitches_variance"]=segment_pitches_variance
	# segment timbres
	segment_timbres_array=hdf5_getters.get_segments_timbre(h5)
	segment_timbres_mean=numpy.mean(segment_timbres_array,axis=0).tolist()
	output_array["segment_timbres_mean"]=segment_timbres_mean
	# segment timbres variance (start)
	segment_timbres_variance=numpy.var(segment_timbres_array,axis=0).tolist()
	output_array["segment_timbres_variance"]=segment_timbres_variance
	# hotttnesss
	hottness=hdf5_getters.get_song_hotttnesss(h5,0)
	output_array["hottness"]=hottness	### ADDED VALUE TO ARRAY
	# duration-start of fade out
	start_of_fade_out=hdf5_getters.get_start_of_fade_out(h5)
	fade_out=duration-start_of_fade_out
	output_array["fade_out"]=fade_out	### ADDED VALUE TO ARRAY
	# tatums
	tatums=hdf5_getters.get_tatums_start(h5)
	num_tatums=len(tatums)
	output_array["num_tatums"]=num_tatums	### ADDED VALUE TO ARRAY
	# mean and variance in tatums length
	tatums_length=numpy.ediff1d(tatums)
	variance_tatums_length=numpy.var(tatums_length)
	output_array["variance_tatums_length"]=variance_tatums_length	### ADDED VALUE TO ARRAY
	# tempo
	tempo=hdf5_getters.get_tempo(h5)
	output_array["tempo"]=tempo	### ADDED VALUE TO ARRAY
	# time signature
	time_signature=hdf5_getters.get_time_signature(h5)
	output_array["time_signature"]=int(time_signature)	### ADDED VALUE TO ARRAY
	# year
	year=hdf5_getters.get_year(h5)
	output_array["year"]=int(year)	### ADDED VALUE TO ARRAY
	# artist terms
	artist_terms=hdf5_getters.get_artist_terms(h5,0)
	output_array["artist_terms"]=artist_terms.tolist()
	artist_terms_freq=hdf5_getters.get_artist_terms_freq(h5,0)
	output_array["artist_terms_freq"]=artist_terms_freq.tolist()
	artist_name=hdf5_getters.get_artist_name(h5,0)
	output_array["artist_name"]=artist_name
	artist_id=hdf5_getters.get_artist_id(h5,0)
	output_array["artist_id"]=artist_id
	# title
	title=hdf5_getters.get_title(h5,0)
	output_array["title"]=title

	return output_array
Exemple #16
0
def main(argv):
    if len(argv) != 1:
        print "Specify data directory"
        return
    basedir = argv[0]
    outputFile1 = open('SongCSV.csv', 'w')
    outputFile2 = open('TagsCSV.csv', 'w')
    csvRowString = ""
    csvLabelString = ""
    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" +
                "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n" +
                "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"


                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        #csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+
        #    "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+
        #    "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+
        #    "Title,Year")
        csvRowString = ("ArtistFamiliarity,ArtistHotttnesss,"+
            "BarsConfidence,BarsStart,BeatsConfidence,BeatsStart,Duration,"+
            "EndOfFadeIn,Key,KeyConfidence,Loudness,Mode,ModeConfidence,"+
            "SectionsConfidence,SectionsStart,SegmentsConfidence,SegmentsLoudnessMax,"+
            "SegmentsLoudnessMaxTime,SegmentsLoudnessStart,SegmentsStart,"+
            "SongHotttnesss,StartOfFadeOut,TatumsConfidence,TatumsStart,Tempo,TimeSignature,TimeSignatureConfidence,"+
            "SegmentsPitches,SegmentsTimbre,Title,Year,Decade,ArtistMbtags")
        #################################################
        header = str()
        csvAttributeList = re.split('\W+', csvRowString)
        arrayAttributes = ["BarsConfidence","BarsStart","BeatsConfidence","BeatsStart",
                           "SectionsConfidence","SectionsStart","SegmentsConfidence","SegmentsLoudnessMax",
                           "SegmentsLoudnessMaxTime","SegmentsLoudnessStart","SegmentsStart",
                           "TatumsConfidence","TatumsStart"]
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
            if(v=="SegmentsPitches"):
                for i in range(90):
                    header = header + "SegmentsPitches" + str(i) + ","
            elif(v=="SegmentsTimbre"):
                for i in range(90):
                    header = header + "SegmentsTimbre" + str(i) + ","
            elif(v in arrayAttributes):
                header = header + v + str(0) + ","
                header = header + v + str(1) + ","
            else:
                header = header + v + ","
        outputFile1.write("SongNumber,");
        #outputFile1.write(csvRowString + "\n");
        outputFile1.write(header + "\n");
        csvRowString = ""

    #################################################


    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    #basedir = "MillionSongSubset/data/A/A/" # "." As the default means the current directory
    ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    all = sorted(os.walk(basedir))
    for root, dirs, files in all:
        files = sorted(glob.glob(os.path.join(root,'*'+ext)))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            #testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.analysisSampleRate = str(hdf5_getters.get_analysis_sample_rate(songH5File))
            song.artistFamiliarity = str(hdf5_getters.get_artist_familiarity(songH5File))
            song.artistHotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
            song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
            song.artistMbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.barsConfidence = np.array(hdf5_getters.get_bars_confidence(songH5File))
            song.barsStart = np.array(hdf5_getters.get_bars_start(songH5File))
            song.beatsConfidence = np.array(hdf5_getters.get_beats_confidence(songH5File))
            song.beatsStart = np.array(hdf5_getters.get_beats_start(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.key = str(hdf5_getters.get_key(songH5File))
            song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File))
            song.sectionsConfidence = np.array(hdf5_getters.get_sections_confidence(songH5File))
            song.sectionsStart = np.array(hdf5_getters.get_sections_start(songH5File))
            song.segmentsConfidence = np.array(hdf5_getters.get_segments_confidence(songH5File))
            song.segmentsLoudnessMax = np.array(hdf5_getters.get_segments_loudness_max(songH5File))
            song.segmentsLoudnessMaxTime = np.array(hdf5_getters.get_segments_loudness_max_time(songH5File))
            song.segmentsLoudnessStart = np.array(hdf5_getters.get_segments_loudness_start(songH5File))
            song.segmentsPitches = np.array(hdf5_getters.get_segments_pitches(songH5File))
            song.segmentsStart = np.array(hdf5_getters.get_segments_start(songH5File))
            song.segmentsTimbre = np.array(hdf5_getters.get_segments_timbre(songH5File))
            song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File))
            song.tatumsConfidence = np.array(hdf5_getters.get_tatums_confidence(songH5File))
            song.tatumsStart = np.array(hdf5_getters.get_tatums_start(songH5File))
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File))
            song.songid = str(hdf5_getters.get_song_id(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            song.artistMbtags = str(hdf5_getters.get_artist_mbtags(songH5File))
            #print song count
            csvRowString += str(song.songCount) + ","
            csvLabelString += str(song.songCount) + ","
            
            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AnalysisSampleRate'.lower():
                    csvRowString += song.analysisSampleRate
                elif attribute == 'ArtistFamiliarity'.lower():
                    csvRowString += song.artistFamiliarity
                elif attribute == 'ArtistHotttnesss'.lower():
                    csvRowString += song.artistHotttnesss
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistMbid'.lower():
                    csvRowString += song.artistMbid
                elif attribute == 'BarsConfidence'.lower():
                    arr = song.barsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'BarsStart'.lower():
                    arr = song.barsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'BeatsConfidence'.lower():
                    arr = song.beatsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'BeatsStart'.lower():
                    arr = song.beatsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'EndOfFadeIn'.lower():
                    csvRowString += song.endOfFadeIn
                elif attribute == 'Energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'Key'.lower():
                    csvRowString += song.key
                elif attribute == 'KeyConfidence'.lower():
                    csvRowString += song.keyConfidence
                elif attribute == 'Loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'Mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'ModeConfidence'.lower():
                    csvRowString += song.modeConfidence
                elif attribute == 'SectionsConfidence'.lower():
                    arr = song.sectionsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SectionsStart'.lower():
                    arr = song.sectionsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsConfidence'.lower():
                    arr = song.segmentsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsLoudnessMax'.lower():
                    arr = song.segmentsLoudnessMax
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsLoudnessMaxTime'.lower():
                    arr = song.segmentsLoudnessMaxTime
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsLoudnessStart'.lower():
                    arr = song.segmentsLoudnessStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SegmentsStart'.lower():
                    arr = song.segmentsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'SongHotttnesss'.lower():
                    hotttnesss = song.songHotttnesss
                    if hotttnesss == 'nan':
                        hotttnesss = 'NaN'
                    csvRowString += hotttnesss
                elif attribute == 'StartOfFadeOut'.lower():
                    csvRowString += song.startOfFadeOut
                elif attribute == 'TatumsConfidence'.lower():
                    arr = song.tatumsConfidence
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'TatumsStart'.lower():
                    arr = song.tatumsStart
                    if arr.shape[0] == 0:
                        arrmean = ''
                        arrnorm = ''
                    else:
                        arrmean = np.mean(arr)
                        arrnorm = np.linalg.norm(arr)
                    csvRowString += str(arrmean) + ',' + str(arrnorm)
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'SegmentsPitches'.lower():
                    colmean = np.mean(song.segmentsPitches,axis=0)
                    for m in colmean:
                        csvRowString += str(m) + ","
                    cov = np.dot(song.segmentsPitches.T,song.segmentsPitches)
                    utriind = np.triu_indices(cov.shape[0])
                    feats = cov[utriind]
                    for feat in feats:
                        csvRowString += str(feat) + ","
                    lastIndex = len(csvRowString)
                    csvRowString = csvRowString[0:lastIndex-1]
                elif attribute == 'SegmentsTimbre'.lower():
                    colmean = np.mean(song.segmentsTimbre,axis=0)
                    for m in colmean:
                        csvRowString += str(m) + ","
                    cov = np.dot(song.segmentsTimbre.T,song.segmentsTimbre)
                    utriind = np.triu_indices(cov.shape[0])
                    feats = cov[utriind]
                    for feat in feats:
                        csvRowString += str(feat) + ","
                    lastIndex = len(csvRowString)
                    csvRowString = csvRowString[0:lastIndex-1]
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'Decade'.lower():
                    yr = song.year
                    if yr > 0:
                        decade = song.year[:-1] + '0'
                    else:
                        decade = '0'
                    csvRowString += decade
                elif attribute == 'ArtistMbtags'.lower():
                    tags = song.artistMbtags[1:-1]
                    tags = "\"" + tags + "\""
                    tags = tags.replace("\n",'')
                    csvRowString += tags
                    tagsarray = shlex.split(tags)
                    for t in tagsarray:
                        csvLabelString += t + ","
                else:
                     csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

                '''
                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',',"")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',','')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","
                '''
            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
            
            lastIndex = len(csvLabelString)
            csvLabelString = csvLabelString[0:lastIndex-1]
            csvLabelString += "\n"
            outputFile2.write(csvLabelString)
            csvLabelString = ""

            songH5File.close()

    outputFile1.close()
    outputFile2.close()
 def get_energy(self):
     if self.h5 == None: self.open()
     return hdf5_getters.get_energy(self.h5)
Exemple #18
0
def get_all_files(basedir,ext='.h5') :
    """
    From a root directory, go through all subdirectories
    and find all files with the given extension.
    Return all absolute paths in a list.
    """
    c=0
    title=[]#get_title(h5)
    name=[]#get_artist_name`
    familiarity=[]#get_artist_familiarity()
    artist_hotness=[]#get_artist_hotttnesss
    song_hotness=[]# get_song_hotttnesss
    danceability=[]#get_danceability
    energy=[]#get_energy
    loudness=[]#get_loudness
    tempo=[]#get_tempo
    mode_confidence=[]#get_mode_confidence()
    time_sig_confidence=[]#get_time_signature_confidence()
    no_segments=[]#len(get_segments_start())
    avg_segment_confidence=[]#np.mean(hdf5_getters.get_segments_confidence(h5))
    avg_segment_pitches=[]#np.mean(hdf5_getters.get_segments_pitches(h51))
    no_sections=[]#len(hdf5_getters.get_sections_start())
    avg_sections_confidence=[]#np.mean(hdf5_getters.get_sections_confidence(h51))
    no_beats_start=[]#len(hdf5_getters.get_beats_start(h51))
    avg_beats_confidence=[]#np.mean(hdf5_getters.get_beats_confidence(h51))
    no_bars=[]#len(hdf5_getters.get_bars_start(h5))
    avg_bar_confidence=[]#np.mean(hdf5_getters.get_bars_confidence((h51))
    no_tatums_start=[]#len(hdf5_getters.get_tatums_start(h5))
    avg_tatums_start=[]#np.mean(get_tatums_confidence())
    billboard_presence=[]#returned value from web scraper
    key=[]
    duration=[]
    mode=[]
    target=pd.read_csv('Billboard.csv')
    j=0
    if(not(os.path.isfile('./data.csv'))):
        for root, dirs, files in os.walk(basedir):
            files = glob.glob(os.path.join(root,'*'+ext))
            for f in files :
                h5 = hdf5_getters.open_h5_file_read(f)
                songnme=hdf5_getters.get_title(h5)
                artst=hdf5_getters.get_artist_name(h5)
                for i in range(len(target)):
                    if(target.Title[i]==songnme and target.Name[i]==artst):
                        billboard_presence.append(target.Presence[i])
                        
                        title.append(songnme)
                        name.append(artst)
                        familiarity.append(hdf5_getters.get_artist_familiarity(h5))
                        artist_hotness.append(hdf5_getters.get_artist_hotttnesss(h5))
                        song_hotness.append(hdf5_getters. get_song_hotttnesss(h5))
                        danceability.append(hdf5_getters.get_danceability(h5))
                        key.append(hdf5_getters.get_key(h5))
                        duration.append(hdf5_getters.get_duration(h5))
                        mode.append(hdf5_getters.get_mode(h5))
                        energy.append(hdf5_getters.get_energy(h5))
                        loudness.append(hdf5_getters.get_loudness(h5))
                        tempo.append(hdf5_getters.get_tempo(h5))
                        mode_confidence.append(hdf5_getters.get_mode_confidence(h5))
                        time_sig_confidence.append(hdf5_getters.get_time_signature_confidence(h5))
                        
                        no_segments.append(len(hdf5_getters.get_segments_start(h5)))
                        avg_segment_confidence.append(np.mean(hdf5_getters.get_segments_confidence(h5)))
                        avg_segment_pitches.append(np.mean(hdf5_getters.get_segments_pitches(h5)))
                        no_sections.append(len(hdf5_getters.get_sections_start(h5)))
                        avg_sections_confidence.append(np.mean(hdf5_getters.get_sections_confidence(h5)))
                        no_beats_start.append(len(hdf5_getters.get_beats_start(h5)))
                        avg_beats_confidence.append(np.mean(hdf5_getters.get_beats_confidence(h5)))
                        no_bars.append(len(hdf5_getters.get_bars_start(h5)))
                        avg_bar_confidence.append(np.mean(hdf5_getters.get_bars_confidence(h5)))
                        no_tatums_start.append(len(hdf5_getters.get_tatums_start(h5)))
                        avg_tatums_start.append(np.mean(hdf5_getters.get_tatums_confidence(h5)))
                        
                        j+=1
                        print j #prints the index number of each song, to keep track of the song being saved to the database, and to identify errors.
                        break;
                        
                h5.close()
        print "Created Arrays"             
        df=pd.DataFrame(title,columns=['Title'])
        df['Artist_Name']=name
        df['Familiarity']=familiarity
        df['Hotness']=artist_hotness
        df['Song_hotness']=song_hotness
        df['Danceability']=danceability
        df['energy']=energy
        df['loudness']=loudness
        df['tempo']=tempo
        df['mode_confidence']=mode_confidence
        df['time_sig_confidence']=time_sig_confidence
        df['no_segments']=no_segments
        df['avg_segment_confidence']=avg_segment_confidence
        df['avg_segment_pitches']=avg_segment_pitches
        df['no_sections']=no_sections
        df['avg_sections_confidence']=avg_sections_confidence
        df['no_beats_start']=no_beats_start
        df['avg_beats_confidence']=avg_beats_confidence
        df['no_bars']=no_bars
        df['avg_bar_confidence']=avg_bar_confidence
        df['no_tatums_start']=no_tatums_start
        df['avg_tatums_start']=avg_tatums_start
        df['key']=key
        df['Mode']=mode
        df['duration']=duration
        df['Presence']=billboard_presence
        print df.head()
        print billboard_presence
        df.to_csv("data.csv")
    else:
        df=pd.read_csv('data.csv',index_col=0)
        print "Number of features in the created dataset:",
        print len(df.keys())
        print
    return df
max_score = -1.0;
print numSongs
for i in range(0, numSongs):
	#Handle each one
	year = h5get.get_year(h5, i)
	if year < 1980 or year > 2010:
		continue;

	song = Song()

	song.year = year

	song.tempo = h5get.get_tempo(h5, i)
	song.duration = h5get.get_duration(h5, i) 
	song.key = h5get.get_key(h5, i)
	song.energy = h5get.get_energy(h5, i)
	song.time_sig = h5get.get_time_signature(h5,i)
	song.mode = h5get.get_mode(h5,i);

	song.hotness = h5get.get_song_hotttnesss(h5, i)
	#print "Hotness: ", song.hotness;
	if math.isnan(song.hotness):
		song.hotness = 0.1;

	song.artist = h5get.get_artist_name(h5, i)

	song.name = h5get.get_title(h5, i)

	if (song.artist.lower(), song.name.lower()) in all_chart_info:
		song.chart_score = float(all_chart_info[(song.artist.lower(), song.name.lower())]);
		print " Got us some data! ", song.artist, " -- ", song.name, ": ", song.chart_score
Exemple #20
0
def data_to_flat_file(basedir,ext='.h5') :
    """This function extract the information from the tables and creates the flat file."""	
    count = 0;	#song counter
    list_to_write= []
    row_to_write = ""
    writer = csv.writer(open("metadata.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    print f	#the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
	    comma=title.find(',')	#eliminating commas in the title
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')	#eliminating commas in the album	
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')	#eliminating double quotes
	    duration = hdf5_getters.get_duration(h5)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	            artist_hotness=-1
	    artist_id = hdf5_getters.get_artist_id(h5)
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
	    artist_loc = hdf5_getters.get_artist_location(h5)
		#checks artist_loc to see if it is a hyperlink if it is set as empty string
	    artist_loc = artist_loc.replace(",", "\,");
	    if artist_loc.startswith("<a"):
                artist_loc = ""
	    if len(artist_loc) > 100:
                artist_loc = ""
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            song_key = hdf5_getters.get_key(h5)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
	    mode = hdf5_getters.get_mode(h5)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
	    song_id = hdf5_getters.get_song_id(h5)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
	    time_sig = hdf5_getters.get_time_signature(h5)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
	    track_id = hdf5_getters.get_track_id(h5)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
	    year = hdf5_getters.get_year(h5)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
	    bars_c_avg= get_avg(bars_c)
	    bars_c_max= get_max(bars_c)
	    bars_c_min = get_min(bars_c)
	    bars_c_stddev= get_stddev(bars_c)
	    bars_c_count = get_count(bars_c)
	    bars_c_sum = get_sum(bars_c)
	    bars_start = hdf5_getters.get_bars_start(h5)
	    bars_start_avg = get_avg(bars_start)
	    bars_start_max= get_max(bars_start)
	    bars_start_min = get_min(bars_start)
	    bars_start_stddev= get_stddev(bars_start)
	    bars_start_count = get_count(bars_start)
	    bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg= get_avg(beats_c)
	    beats_c_max= get_max(beats_c)
	    beats_c_min = get_min(beats_c)
	    beats_c_stddev= get_stddev(beats_c)
	    beats_c_count = get_count(beats_c)
	    beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
 	    beats_start_avg = get_avg(beats_start)
	    beats_start_max= get_max(beats_start)
	    beats_start_min = get_min(beats_start)
	    beats_start_stddev= get_stddev(beats_start)
	    beats_start_count = get_count(beats_start)
	    beats_start_sum = get_sum(beats_start)
	    sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg= get_avg(sec_c)
	    sec_c_max= get_max(sec_c)
	    sec_c_min = get_min(sec_c)
	    sec_c_stddev= get_stddev(sec_c)
	    sec_c_count = get_count(sec_c)
	    sec_c_sum = get_sum(sec_c)
	    sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
	    sec_start_max= get_max(sec_start)
	    sec_start_min = get_min(sec_start)
	    sec_start_stddev= get_stddev(sec_start)
	    sec_start_count = get_count(sec_start)
	    sec_start_sum = get_sum(sec_start)
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    seg_c_avg= get_avg(seg_c)
	    seg_c_max= get_max(seg_c)
	    seg_c_min = get_min(seg_c)
	    seg_c_stddev= get_stddev(seg_c)
	    seg_c_count = get_count(seg_c)
	    seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg= get_avg(seg_loud_max)
	    seg_loud_max_max= get_max(seg_loud_max)
	    seg_loud_max_min = get_min(seg_loud_max)
	    seg_loud_max_stddev= get_stddev(seg_loud_max)
	    seg_loud_max_count = get_count(seg_loud_max)
	    seg_loud_max_sum = get_sum(seg_loud_max)
	    seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
	    seg_loud_max_time_avg= get_avg(seg_loud_max_time)
	    seg_loud_max_time_max= get_max(seg_loud_max_time)
	    seg_loud_max_time_min = get_min(seg_loud_max_time)
	    seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
	    seg_loud_max_time_count = get_count(seg_loud_max_time)
	    seg_loud_max_time_sum = get_sum(seg_loud_max_time)
	    seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
	    seg_loud_start_avg= get_avg(seg_loud_start)
	    seg_loud_start_max= get_max(seg_loud_start)
	    seg_loud_start_min = get_min(seg_loud_start)
	    seg_loud_start_stddev= get_stddev(seg_loud_start)
	    seg_loud_start_count = get_count(seg_loud_start)
	    seg_loud_start_sum = get_sum(seg_loud_start)					      
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    pitch_size = len(seg_pitch)
	    seg_start = hdf5_getters.get_segments_start(h5)
	    seg_start_avg= get_avg(seg_start)
	    seg_start_max= get_max(seg_start)
	    seg_start_min = get_min(seg_start)
	    seg_start_stddev= get_stddev(seg_start)
	    seg_start_count = get_count(seg_start)
	    seg_start_sum = get_sum(seg_start)
	    seg_timbre = hdf5_getters.get_segments_timbre(h5)
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    tatms_c_avg= get_avg(tatms_c)
	    tatms_c_max= get_max(tatms_c)
	    tatms_c_min = get_min(tatms_c)
	    tatms_c_stddev= get_stddev(tatms_c)
	    tatms_c_count = get_count(tatms_c)
	    tatms_c_sum = get_sum(tatms_c)
	    tatms_start = hdf5_getters.get_tatums_start(h5)
	    tatms_start_avg= get_avg(tatms_start)
	    tatms_start_max= get_max(tatms_start)
	    tatms_start_min = get_min(tatms_start)
	    tatms_start_stddev= get_stddev(tatms_start)
	    tatms_start_count = get_count(tatms_start)
	    tatms_start_sum = get_sum(tatms_start)
	
	    #Getting the genres
	    genre_set = 0    #flag to see if the genre has been set or not
	    art_trm = hdf5_getters.get_artist_terms(h5)
	    trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			    for i in genres_so_far:
				final_genre.append(i)
				genre_set=1				#genre was found in dictionary
				  
		
	    
	    if genre_set == 1:
		    col_num=[]
		   
		    for genre in final_genre:
			    column=int(genre)				#getting the column number of the genre
			    col_num.append(column)

		    genre_array=genre_columns(col_num)	         #genre array
 	    else:
		    genre_array=genre_columns(-1)		#the genre was not found in the dictionary

	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1
		


		#Writing to the flat file

            writer.writerow([title,album,artist_name,duration,samp_rt,artist_7digitalid,artist_fam,artist_hotness,artist_id,artist_lat,artist_loc,artist_lon,artist_mbid,genre_array[0],genre_array[1],genre_array[2],
genre_array[3],genre_array[4],genre_array[5],genre_array[6],genre_array[7],genre_array[8],genre_array[9],genre_array[10],genre_array[11],genre_array[12],genre_array[13],genre_array[14],genre_array[15],
genre_array[16],genre_array[17],genre_array[18],genre_array[19],genre_array[20],genre_array[21],genre_array[22],genre_array[23],genre_array[24],genre_array[25],genre_array[26],
genre_array[27],genre_array[28],genre_array[29],genre_array[30],genre_array[31],genre_array[32],genre_array[33],genre_array[34],genre_array[35],genre_array[36],genre_array[37],genre_array[38],
genre_array[39],genre_array[40],genre_array[41],genre_array[42],genre_array[43],genre_array[44],genre_array[45],genre_array[46],genre_array[47],genre_array[48],genre_array[49],
genre_array[50],genre_array[51],genre_array[52],genre_array[53],genre_array[54],genre_array[55],genre_array[56],genre_array[57],genre_array[58],genre_array[59],
genre_array[60],genre_array[61],genre_array[62],genre_array[63],genre_array[64],genre_array[65],genre_array[66],genre_array[67],genre_array[68],genre_array[69],
genre_array[70],genre_array[71],genre_array[72],genre_array[73],genre_array[74],genre_array[75],genre_array[76],genre_array[77],genre_array[78],genre_array[79],
genre_array[80],genre_array[81],genre_array[82],genre_array[83],genre_array[84],genre_array[85],genre_array[86],genre_array[87],genre_array[88],genre_array[89],
genre_array[90],genre_array[91],genre_array[92],genre_array[93],genre_array[94],genre_array[95],genre_array[96],genre_array[97],genre_array[98],genre_array[99],genre_array[100],genre_array[101],
genre_array[102],genre_array[103],genre_array[104],genre_array[105],genre_array[106],genre_array[107],genre_array[108],genre_array[109],genre_array[110],genre_array[111],genre_array[112],
genre_array[113],genre_array[114],genre_array[115],genre_array[116],genre_array[117],genre_array[118],genre_array[119],genre_array[120],genre_array[121],genre_array[122],genre_array[123],
genre_array[124],genre_array[125],genre_array[126],genre_array[127],genre_array[128],genre_array[129],genre_array[130],genre_array[131],genre_array[132],
artist_pmid,audio_md5,danceability,end_fade_in,energy,song_key,key_c,loudness,mode,mode_conf,release_7digitalid,song_hot,song_id,start_fade_out,tempo,time_sig,time_sig_c,track_id,track_7digitalid,year,bars_c_avg,bars_c_max,bars_c_min,bars_c_stddev,bars_c_count,bars_c_sum,bars_start_avg,bars_start_max,bars_start_min,bars_start_stddev,bars_start_count,bars_start_sum,beats_c_avg,beats_c_max,beats_c_min,beats_c_stddev,beats_c_count,beats_c_sum,beats_start_avg,beats_start_max,beats_start_min, beats_start_stddev,beats_start_count,beats_start_sum, sec_c_avg,sec_c_max,sec_c_min,sec_c_stddev,sec_c_count,sec_c_sum,sec_start_avg,sec_start_max,sec_start_min,sec_start_stddev,sec_start_count,sec_start_sum,seg_c_avg,seg_c_max,seg_c_min,seg_c_stddev,seg_c_count,seg_c_sum,seg_loud_max_avg,seg_loud_max_max,seg_loud_max_min,seg_loud_max_stddev,seg_loud_max_count,seg_loud_max_sum,seg_loud_max_time_avg,seg_loud_max_time_max,seg_loud_max_time_min,seg_loud_max_time_stddev,seg_loud_max_time_count,seg_loud_max_time_sum,seg_loud_start_avg,seg_loud_start_max,seg_loud_start_min,seg_loud_start_stddev,seg_loud_start_count,seg_loud_start_sum,seg_pitch_avg[0],seg_pitch_max[0],seg_pitch_min[0],seg_pitch_stddev[0],seg_pitch_count[0],seg_pitch_sum[0],seg_pitch_avg[1],seg_pitch_max[1],seg_pitch_min[1],seg_pitch_stddev[1],seg_pitch_count[1],seg_pitch_sum[1],seg_pitch_avg[2],seg_pitch_max[2],seg_pitch_min[2],seg_pitch_stddev[2],seg_pitch_count[2],seg_pitch_sum[2],seg_pitch_avg[3],seg_pitch_max[3],seg_pitch_min[3],seg_pitch_stddev[3],seg_pitch_count[3],seg_pitch_sum[3],seg_pitch_avg[4],seg_pitch_max[4],seg_pitch_min[4],seg_pitch_stddev[4],seg_pitch_count[4],seg_pitch_sum[4],seg_pitch_avg[5],seg_pitch_max[5],seg_pitch_min[5],seg_pitch_stddev[5],seg_pitch_count[5],seg_pitch_sum[5],seg_pitch_avg[6],seg_pitch_max[6],seg_pitch_min[6],seg_pitch_stddev[6],seg_pitch_count[6],seg_pitch_sum[6],seg_pitch_avg[7],seg_pitch_max[7],seg_pitch_min[7],seg_pitch_stddev[7],seg_pitch_count[7],seg_pitch_sum[7],seg_pitch_avg[8],seg_pitch_max[8],seg_pitch_min[8],seg_pitch_stddev[8],seg_pitch_count[8],seg_pitch_sum[8],seg_pitch_avg[9],seg_pitch_max[9],seg_pitch_min[9],seg_pitch_stddev[9],seg_pitch_count[9],seg_pitch_sum[9],seg_pitch_avg[10],seg_pitch_max[10],seg_pitch_min[10],seg_pitch_stddev[10],seg_pitch_count[10],seg_pitch_sum[10],seg_pitch_avg[11],seg_pitch_max[11],seg_pitch_min[11],
seg_pitch_stddev[11],seg_pitch_count[11],seg_pitch_sum[11],seg_start_avg,seg_start_max,seg_start_min,seg_start_stddev, 
seg_start_count,seg_start_sum,seg_timbre_avg[0],seg_timbre_max[0],seg_timbre_min[0],seg_timbre_stddev[0],seg_timbre_count[0],
seg_timbre_sum[0],seg_timbre_avg[1],seg_timbre_max[1],seg_timbre_min[1],seg_timbre_stddev[1],seg_timbre_count[1],
seg_timbre_sum[1],seg_timbre_avg[2],seg_timbre_max[2],seg_timbre_min[2],seg_timbre_stddev[2],seg_timbre_count[2],
seg_timbre_sum[2],seg_timbre_avg[3],seg_timbre_max[3],seg_timbre_min[3],seg_timbre_stddev[3],seg_timbre_count[3],
seg_timbre_sum[3],seg_timbre_avg[4],seg_timbre_max[4],seg_timbre_min[4],seg_timbre_stddev[4],seg_timbre_count[4],
seg_timbre_sum[4],seg_timbre_avg[5],seg_timbre_max[5],seg_timbre_min[5],seg_timbre_stddev[5],seg_timbre_count[5],
seg_timbre_sum[5],seg_timbre_avg[6],seg_timbre_max[6],seg_timbre_min[6],seg_timbre_stddev[6],seg_timbre_count[6],
seg_timbre_sum[6],seg_timbre_avg[7],seg_timbre_max[7],seg_timbre_min[7],seg_timbre_stddev[7],seg_timbre_count[7],
seg_timbre_sum[7],seg_timbre_avg[8],seg_timbre_max[8],seg_timbre_min[8],seg_timbre_stddev[8],seg_timbre_count[8],
seg_timbre_sum[8],seg_timbre_avg[9],seg_timbre_max[9],seg_timbre_min[9],seg_timbre_stddev[9],seg_timbre_count[9],
seg_timbre_sum[9],seg_timbre_avg[10],seg_timbre_max[10],seg_timbre_min[10],seg_timbre_stddev[10],seg_timbre_count[10],
seg_timbre_sum[10],seg_timbre_avg[11],seg_timbre_max[11],seg_timbre_min[11],seg_timbre_stddev[11],seg_timbre_count[11],
seg_timbre_sum[11],tatms_c_avg,tatms_c_max,tatms_c_min,tatms_c_stddev,tatms_c_count,tatms_c_sum,tatms_start_avg,tatms_start_max,tatms_start_min,tatms_start_stddev,tatms_start_count,tatms_start_sum])






	    h5.close()
	    count=count+1;
	    print count;
            temp = hdf5_getters.get_artist_mbtags(songH5File)
            song.artistMBTags = remove_trap_characters(str(list(temp)))
            song.artistMBTagsOuterCount = get_list_length(temp)
            song.artistMBTagsCount = remove_trap_characters(
                str(list(hdf5_getters.get_artist_mbtags_count(songH5File))))
            song.analysisSampleRate = remove_trap_characters(
                str(hdf5_getters.get_analysis_sample_rate(songH5File)))
            song.audioMD5 = remove_trap_characters(
                str(hdf5_getters.get_audio_md5(songH5File)))
            song.endOfFadeIn = remove_trap_characters(
                str(hdf5_getters.get_end_of_fade_in(songH5File)))
            song.startOfFadeOut = remove_trap_characters(
                str(hdf5_getters.get_start_of_fade_out(songH5File)))
            song.energy = remove_trap_characters(
                str(hdf5_getters.get_energy(songH5File)))
            song.release = remove_trap_characters(
                str(hdf5_getters.get_release(songH5File)))
            song.release7digitalid = remove_trap_characters(
                str(hdf5_getters.get_release_7digitalid(songH5File)))
            song.songHotness = remove_trap_characters(
                str(hdf5_getters.get_song_hotttnesss(songH5File)))
            song.track7digitalid = remove_trap_characters(
                str(hdf5_getters.get_track_7digitalid(songH5File)))

            temp = hdf5_getters.get_similar_artists(songH5File)
            song.similarartists = remove_trap_characters(str(list(list(temp))))
            song.similarArtistsCount = get_list_length(temp)
            song.loudness = remove_trap_characters(
                str(hdf5_getters.get_loudness(songH5File)))
            song.mode = remove_trap_characters(
Exemple #22
0
def main():
    outputFileName = sys.argv[2]
    outputFile1 = open(outputFileName, 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            # outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = (
            "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"
            +
            "ArtistLongitude,ArtistFamiliarity,ArtistHotttnesss,ArtistName," +
            "ArtistMBTags,ArtistTerms," +
            "Danceability,Energy,Duration,KeySignature," +
            "KeySignatureConfidence,Loudness,Mode,Hotttnesss,Tempo,TimeSignature,TimeSignatureConfidence,"
            + "Title,Year")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        # outputFile1.write("SongNumber,");
        # outputFile1.write(csvRowString + "\n");
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = sys.argv[1]  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistFamiliarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.artistHotttnesss = str(
                hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.artistMBTags = ','.join(
                hdf5_getters.get_artist_mbtags(songH5File))
            # song.artistMBTagsCount = ','.join(hdf5_getters.get_artist_mbtags_count(songH5File))
            song.artistTerms = ','.join(
                hdf5_getters.get_artist_terms(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))

            #print song count
            # csvRowString += str(song.songCount) + ","

            rowString = json.dumps({
                'AlbumID': song.albumID,
                'AlbumName': song.albumName,
                'ArtistID': song.artistID,
                'ArtistLatitude': song.artistLatitude,
                'ArtistLocation': song.artistLocation,
                'ArtistLongitude': song.artistLongitude,
                'ArtistFamiliarity': song.artistFamiliarity,
                'ArtistHotttnesss': song.artistHotttnesss,
                'ArtistName': song.artistName,
                'ArtistMBTags': song.artistMBTags,
                'ArtistTerms': song.artistTerms,
                'Danceability': song.danceability,
                'Energy': song.energy,
                'Duration': song.duration,
                'KeySignature': song.keySignature,
                'KeySignatureConfidence': song.keySignatureConfidence,
                'Loudness': song.loudness,
                'Mode': song.mode,
                'Hotttnesss': song.hotttnesss,
                'Tempo': song.tempo,
                'SongID': song.id,
                'TimeSignature': song.timeSignature,
                'TimeSignatureConfidence': song.timeSignatureConfidence,
                'Title': song.title,
                'Year': song.year,
            })

            #Remove the final comma from each row in the csv
            rowString += "\n"
            outputFile1.write(rowString)

            songH5File.close()

    outputFile1.close()
     for e in GETTERS.get_artist_mbtags_count(h5, i))  # array
 artist_name = GETTERS.get_artist_name(h5, i)
 artist_playmeid = GETTERS.get_artist_playmeid(h5, i)
 artist_terms = ','.join(
     str(e) for e in GETTERS.get_artist_terms(h5, i))  # array
 #artist_terms_freq = ','.join(str(e) for e in GETTERS.get_artist_terms_freq(h5, i)) # array
 #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array
 #audio_md5 = GETTERS.get_audio_md5(h5, i)
 #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array
 #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array
 #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array
 #beats_start = ','.join(str(e) for e in GETTERS.get_beats_start(h5, i)) # array
 danceability = GETTERS.get_danceability(h5, i)
 duration = GETTERS.get_duration(h5, i)
 end_of_fade_in = GETTERS.get_end_of_fade_in(h5, i)
 energy = GETTERS.get_energy(h5, i)
 key = GETTERS.get_key(h5, i)
 key_confidence = GETTERS.get_key_confidence(h5, i)
 loudness = GETTERS.get_loudness(h5, i)
 mode = GETTERS.get_mode(h5, i)
 mode_confidence = GETTERS.get_mode_confidence(h5, i)
 release = GETTERS.get_release(h5, i)
 release_7digitalid = GETTERS.get_release_7digitalid(h5, i)
 #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array
 #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array
 #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array
 #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array
 #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array
 #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array
 #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array
 #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array
def get_all_rows(basedir, ext='.h5'):
    rows = []
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            #            print(os.path.join(root, f))
            h5 = hdf5_getters.open_h5_file_read(f)
            num_songs = hdf5_getters.get_num_songs(h5)
            #            print(num_songs)

            for i in range(num_songs):
                print(i)
                obj = {}
                obj['artist_name'] = hdf5_getters.get_artist_name(
                    h5, i).decode('UTF-8')
                obj['artist_familiarity'] = hdf5_getters.get_artist_familiarity(
                    h5, i)
                obj['artist_hotness'] = hdf5_getters.get_artist_hotttnesss(
                    h5, i)
                obj['artist_id'] = hdf5_getters.get_artist_id(
                    h5, i).decode('UTF-8')
                #                obj['artist_mbid']=hdf5_getters.get_artist_mbid(h5,i).decode('UTF-8')
                obj['artist_playmeid'] = hdf5_getters.get_artist_playmeid(
                    h5, i)
                obj['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid(
                    h5, i)
                #                obj['artist_latitude']=hdf5_getters.get_artist_latitude(h5,i)
                #                obj['artist_longitude']=hdf5_getters.get_artist_longitude(h5,i)
                #                obj['artist_location']=hdf5_getters.get_artist_location(h5,i).decode('UTF-8')
                obj['artist_name'] = hdf5_getters.get_artist_name(
                    h5, i).decode('UTF-8')
                obj['release'] = hdf5_getters.get_release(h5,
                                                          i).decode('UTF-8')
                obj['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss(
                    h5, i)
                obj['title'] = hdf5_getters.get_title(h5, i).decode('UTF-8')

                #            obj['artist_terms']=hdf5_getters.get_artist_terms(h5)
                #                obj['artist_terms_freq']=hdf5_getters.get_artist_terms_freq(h5)
                #                obj['artist_terms_weight']=hdf5_getters.get_artist_terms_weight(h5)
                #            obj['audio_md5']=hdf5_getters.get_audio_md5(h5).decode('UTF-8')
                obj['danceability'] = hdf5_getters.get_danceability(h5, i)
                obj['duration'] = hdf5_getters.get_duration(h5, i)
                obj['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5, i)
                obj['energy'] = hdf5_getters.get_energy(h5, i)
                obj['key'] = hdf5_getters.get_key(h5, i)
                obj['key_confidence'] = hdf5_getters.get_key_confidence(h5, i)
                obj['loudness'] = hdf5_getters.get_loudness(h5, i)
                obj['mode'] = hdf5_getters.get_mode(h5, i)
                #            obj['start_of_fade_out']=hdf5_getters.get_start_of_fade_out(h5)
                obj['tempo'] = hdf5_getters.get_tempo(h5, i)
                obj['time_signature'] = hdf5_getters.get_time_signature(h5, i)
                #            obj['time_signature_confidence']=hdf5_getters.get_time_signature_confidence(h5)
                obj['track_id'] = hdf5_getters.get_track_id(h5,
                                                            i).decode('UTF-8')
                #            obj['segments_start']=hdf5_getters.get_segments_start(h5)
                #            obj['segments_confidence']=hdf5_getters.get_segments_confidence(h5)
                #            obj['segments_pitches']=hdf5_getters.get_segments_pitches(h5)
                #            obj['segments_timbre']=hdf5_getters.get_segments_timbre(h5)
                #            obj['segments_loudness_max']=hdf5_getters.get_segments_loudness_max(h5)
                #            obj['segments_loudness_max_time']=hdf5_getters.get_segments_loudness_max_time(h5)
                #            obj['segments_confidence']=hdf5_getters.get_segments_confidence(h5)
                #            obj['segments_loudness_start']=hdf5_getters.get_segments_loudness_start(h5)
                #            obj['sections_start']=hdf5_getters.get_sections_start(h5)
                #            obj['sections_confidence']=hdf5_getters.get_sections_confidence(h5)
                #            obj['beats_start']=hdf5_getters.get_beats_start(h5)
                #            obj['beats_confidence']=hdf5_getters.get_beats_confidence(h5)
                #            obj['bars_start']=hdf5_getters.get_bars_start(h5)
                #            obj['bars_confidence']=hdf5_getters.get_bars_confidence(h5)
                #            obj['tatums_start']=hdf5_getters.get_tatums_start(h5)
                #            obj['artist_mbtags']=hdf5_getters.get_artist_mbtags(h5)
                #            obj['artist_mbtags_count']=hdf5_getters.get_artist_mbtags_count(h5)
                obj['year'] = hdf5_getters.get_year(h5, i)
                rows.append(obj)
        h5.close()
    return rows
Exemple #25
0
def main():
    basedir = "./../songMetaInfo.txt"

    ext = ".h5"

    if len(sys.argv) > 1:
        basedir = sys.argv[1]

    outputfile = 'SongFileMetaData.csv'

    if len(sys.argv) > 2:
        outputfile = sys.argv[2]

    csvWriter = open(outputfile, 'w')

    csvWriter.write(
        "title,songId,artistId,artistfamilarity,artistHotness,songHotness," +
        "songEnfOfFadeIn,startFadeout,energy,loudness,albumID,albumName,artistName,danceability,duration,keySignatureConfidence,tempo,timeSignature,timeSignatureConfidence,year\n"
    )

    with open(basedir) as file:
        for line in file.readlines():
            f = line.strip()
            #newf = f + "text"
            print f
            #print f
            try:
                songH5File = hdf5_getters.open_h5_file_read(f)
                csvStr = ""
                #0
                title = str(hdf5_getters.get_title(songH5File))
                csvStr += title + ","
                #1
                songId = str(hdf5_getters.get_song_id(songH5File))
                csvStr += songId + ","
                #2
                artistId = str(hdf5_getters.get_artist_id(songH5File))
                csvStr += artistId + ","
                #3
                artistfamilarity = str(
                    hdf5_getters.get_artist_familiarity(songH5File))
                csvStr += artistfamilarity + ","
                #4
                artistHotness = str(
                    hdf5_getters.get_artist_hotttnesss(songH5File))
                csvStr += artistHotness + ","
                #5
                songHotness = str(hdf5_getters.get_song_hotttnesss(songH5File))
                csvStr += songHotness + ","
                #6
                songEnfOfFadeIn = str(
                    hdf5_getters.get_end_of_fade_in(songH5File))
                csvStr += songEnfOfFadeIn + ","
                #7
                startFadeOut = str(
                    hdf5_getters.get_start_of_fade_out(songH5File))
                csvStr += startFadeOut + ","
                #8
                energy = str(hdf5_getters.get_energy(songH5File))
                csvStr += energy + ","
                #9
                loudness = str(hdf5_getters.get_loudness(songH5File))
                csvStr += loudness + ","
                #10
                albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
                csvStr += albumID + ","
                #11
                albumName = str(hdf5_getters.get_release(songH5File))
                csvStr += albumName + ","
                #12
                artistName = str(hdf5_getters.get_artist_name(songH5File))
                csvStr += artistName + ","
                #13
                danceability = str(hdf5_getters.get_danceability(songH5File))
                csvStr += danceability + ","
                #14
                duration = str(hdf5_getters.get_duration(songH5File))
                csvStr += duration + ","
                #15
                keySignatureConfidence = str(
                    hdf5_getters.get_key_confidence(songH5File))
                csvStr += keySignatureConfidence + ","
                #16
                tempo = str(hdf5_getters.get_tempo(songH5File))
                csvStr += tempo + ","
                ## 17
                timeSignature = str(
                    hdf5_getters.get_time_signature(songH5File))
                csvStr += timeSignature + ","
                #18
                timeSignatureConfidence = str(
                    hdf5_getters.get_time_signature_confidence(songH5File))
                csvStr += timeSignatureConfidence + ","
                #19
                year = str(hdf5_getters.get_year(songH5File))
                csvStr += year + ","
                #print song count
                csvStr += "\n"
                csvWriter.write(csvStr)
                #print csvStr

                songH5File.close()
            except:
                print "Error in processing file"

        csvWriter.close()
    total_tracks += 1

    # Check existence of the HDF5 file
    if not os.path.isfile(hdf5path):
        continue
        #print('ERROR: file',hdf5path,'does not exist.')
        #sys.exit(0)

    h5 = hdf5_getters.open_h5_file_read(hdf5path)

    # Retrieve features from HDF5
    danceability = hdf5_getters.get_danceability(h5)
    duration = hdf5_getters.get_duration(h5)
    time_of_fade_in = hdf5_getters.get_end_of_fade_in(h5)
    energy = hdf5_getters.get_energy(h5)
    key = hdf5_getters.get_key(h5)
    key_confidence = hdf5_getters.get_key_confidence(h5)
    loudness = hdf5_getters.get_loudness(h5)
    mode = hdf5_getters.get_mode(h5)
    mode_confidence = hdf5_getters.get_mode_confidence(h5)
    sections_start = hdf5_getters.get_sections_start(h5)
    num_sections = len(sections_start)
    if num_sections == 0:
        h5.close()
        continue
    segments_loudness_max = hdf5_getters.get_segments_loudness_max(h5)
    segments_loudness_start = hdf5_getters.get_segments_loudness_start(h5)
    num_segments = len(hdf5_getters.get_segments_start(h5))
    num_tatums = len(hdf5_getters.get_tatums_start(h5))
    time_of_fade_out = duration - hdf5_getters.get_start_of_fade_out(h5)
def data_to_flat_file(basedir,ext='.h5') :
    """ This function extracts the information from the tables and creates the flat file. """
    count = 0; #song counter
    list_to_write= []
    group_index=0
    row_to_write = ""
    writer = csv.writer(open("complete.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
	files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
	    row=[]
	    print f
            h5 = hdf5_getters.open_h5_file_read(f)
	    title = hdf5_getters.get_title(h5) 
	    title= title.replace('"','') 
            row.append(title)
	    comma=title.find(',')
	    if	comma != -1:
		    print title
		    time.sleep(1)
	    album = hdf5_getters.get_release(h5)
	    album= album.replace('"','')
            row.append(album)
	    comma=album.find(',')
	    if	comma != -1:
		    print album
		    time.sleep(1)
	    artist_name = hdf5_getters.get_artist_name(h5)
	    comma=artist_name.find(',')
	    if	comma != -1:
		    print artist_name
		    time.sleep(1)
	    artist_name= artist_name.replace('"','')
            row.append(artist_name)
	    duration = hdf5_getters.get_duration(h5)
            row.append(duration)
	    samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            row.append(samp_rt)
	    artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            row.append(artist_7digitalid)
	    artist_fam = hdf5_getters.get_artist_familiarity(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_fam) == True:
	            artist_fam=-1
            row.append(artist_fam)
	    artist_hotness= hdf5_getters.get_artist_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_hotness) == True:
	             artist_hotness=-1
            row.append(artist_hotness)
	    artist_id = hdf5_getters.get_artist_id(h5)
            row.append(artist_id)           
	    artist_lat = hdf5_getters.get_artist_latitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lat) == True:
	            artist_lat=-1
            row.append(artist_lat)
	    artist_loc = hdf5_getters.get_artist_location(h5)
            row.append(artist_loc)
	    artist_lon = hdf5_getters.get_artist_longitude(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(artist_lon) == True:
	            artist_lon=-1
            row.append(artist_lon)
	    artist_mbid = hdf5_getters.get_artist_mbid(h5)
            row.append(artist_mbid)

	    #Getting the genre				       
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
	    trn_wght = hdf5_getters.get_artist_terms_weight(h5)
	    a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
	    genre_indexes=get_genre_indexes(trm_freq) 		    #index of the highest freq
	    genre_set=0					            #flag to see if the genre has been set or not
	    final_genre=[]
	    genres_so_far=[]
	    for i in range(len(genre_indexes)):
		    genre_tmp=get_genre(art_trm,genre_indexes[i])   #genre that corresponds to the highest freq
		    genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary
		    if len(genres_so_far) != 0:
			for i in genres_so_far:
				final_genre.append(i)
			    	genre_set=1
			
			
	    if genre_set == 1:
		col_num=[]
		for i in final_genre:
			column=int(i)				#getting the column number of the genre
			col_num.append(column)
	
		genre_array=genre_columns(col_num)	                #genre array 
	        for i in range(len(genre_array)):                   	#appending the genre_array to the row 
			row.append(genre_array[i])
	    else:
		genre_array=genre_columns(-1)				#when there is no genre matched, return an array of [0...0]
	        for i in range(len(genre_array)):                   	#appending the genre_array to the row 
			row.append(genre_array[i])
					

	    artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            row.append(artist_pmid)
	    audio_md5 = hdf5_getters.get_audio_md5(h5)
            row.append(audio_md5)
	    danceability = hdf5_getters.get_danceability(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(danceability) == True:
	            danceability=-1
            row.append(danceability)
	    end_fade_in =hdf5_getters.get_end_of_fade_in(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(end_fade_in) == True:
	            end_fade_in=-1
            row.append(end_fade_in)
	    energy = hdf5_getters.get_energy(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(energy) == True:
	            energy=-1
            row.append(energy)
            song_key = hdf5_getters.get_key(h5)
            row.append(song_key)
	    key_c = hdf5_getters.get_key_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(key_c) == True:
	            key_c=-1
            row.append(key_c)
	    loudness = hdf5_getters.get_loudness(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(loudness) == True:
	            loudness=-1
            row.append(loudness)
	    mode = hdf5_getters.get_mode(h5)
            row.append(mode)
	    mode_conf = hdf5_getters.get_mode_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(mode_conf) == True:
	            mode_conf=-1
            row.append(mode_conf)
	    release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            row.append(release_7digitalid)
	    song_hot = hdf5_getters.get_song_hotttnesss(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(song_hot) == True:
	            song_hot=-1
            row.append(song_hot)
	    song_id = hdf5_getters.get_song_id(h5)
            row.append(song_id)
	    start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            row.append(start_fade_out)
	    tempo = hdf5_getters.get_tempo(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(tempo) == True:
	            tempo=-1
            row.append(tempo)
	    time_sig = hdf5_getters.get_time_signature(h5)
            row.append(time_sig)
	    time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
	    #checking if we get a "nan" if we do we change it to -1
	    if numpy.isnan(time_sig_c) == True:
	            time_sig_c=-1
            row.append(time_sig_c)
	    track_id = hdf5_getters.get_track_id(h5)
            row.append(track_id)
	    track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            row.append(track_7digitalid)
	    year = hdf5_getters.get_year(h5)
            row.append(year)
	    bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_start = hdf5_getters.get_bars_start(h5)
	    row_bars_padding=padding(245)   #this is the array that will be attached at the end of th row

	    #--------------bars---------------"
	    gral_info=[]
	    gral_info=row[:]
	    empty=[]
	    for i,item in enumerate(bars_c):
                row.append(group_index)
                row.append(i)
                row.append(bars_c[i])
	        bars_c_avg= get_avg(bars_c)
                row.append(bars_c_avg)
	        bars_c_max= get_max(bars_c)	
                row.append(bars_c_max)
	        bars_c_min = get_min(bars_c)
                row.append(bars_c_min)
	        bars_c_stddev= get_stddev(bars_c)
                row.append(bars_c_stddev)
	        bars_c_count = get_count(bars_c)
                row.append(bars_c_count)
	        bars_c_sum = get_sum(bars_c)
                row.append(bars_c_sum)
                row.append(bars_start[i])	         
	        bars_start_avg = get_avg(bars_start)
                row.append(bars_start_avg)	         
	        bars_start_max= get_max(bars_start)
                row.append(bars_start_max)	         
	        bars_start_min = get_min(bars_start)
                row.append(bars_start_min)	         
	        bars_start_stddev= get_stddev(bars_start)
                row.append(bars_start_stddev)	         
	        bars_start_count = get_count(bars_start)
                row.append(bars_start_count)	         
	        bars_start_sum = get_sum(bars_start)
                row.append(bars_start_sum)	         
		for i in row_bars_padding:
			row.append(i)

                writer.writerow(row)
		row=[]
		row=gral_info[:]
	 

            #--------beats---------------"
	    beats_c = hdf5_getters.get_beats_confidence(h5)
	    group_index=1
	    row=[]
	    row=gral_info[:]
	    row_front=padding(14)  	#blanks left in front of the row(empty spaces for bars)
	    row_beats_padding=padding(231)
	    for i,item in enumerate(beats_c):
	   	row.append(group_index)
		row.append(i)
		for index in row_front:  #padding blanks in front of the beats
			row.append(index)
		
		row.append(beats_c[i])
	        beats_c_avg= get_avg(beats_c)
		row.append(beats_c_avg)
	        beats_c_max= get_max(beats_c)
		row.append(beats_c_max)
                beats_c_min = get_min(beats_c)
		row.append(beats_c_min)
	        beats_c_stddev= get_stddev(beats_c)
		row.append(beats_c_stddev)
	        beats_c_count = get_count(beats_c)
		row.append(beats_c_count)
	        beats_c_sum = get_sum(beats_c)
		row.append(beats_c_sum)
                beats_start = hdf5_getters.get_beats_start(h5)
		row.append(beats_start[i])
 	        beats_start_avg = get_avg(beats_start)
		row.append(beats_start_avg)
	        beats_start_max= get_max(beats_start)
		row.append(beats_start_max)
	        beats_start_min = get_min(beats_start)
		row.append(beats_start_min)
	        beats_start_stddev= get_stddev(beats_start)
		row.append(beats_start_stddev)
	        beats_start_count = get_count(beats_start)
		row.append(beats_start_count)
	        beats_start_sum = get_sum(beats_start)
		row.append(beats_start_sum)
		for i in row_beats_padding:
			row.append(i)
                
		writer.writerow(row)
		row=[]
		row=gral_info[:]

            # "--------sections---------------"
	    row_sec_padding=padding(217)	#blank spaces left at the end of the row
	    sec_c = hdf5_getters.get_sections_confidence(h5)
	    group_index=2
	    row=[]
	    row=gral_info[:]
	    row_front=padding(28)		#blank spaces left in front(empty spaces for bars,beats)
	    for i,item in enumerate(sec_c):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of the sections
			row.append(index)

		row.append(sec_c[i])
                sec_c_avg= get_avg(sec_c)
		row.append(sec_c_avg)
	        sec_c_max= get_max(sec_c)
		row.append(sec_c_max)
	        sec_c_min = get_min(sec_c)
		row.append(sec_c_min)
	        sec_c_stddev= get_stddev(sec_c)
		row.append(sec_c_stddev)
	        sec_c_count = get_count(sec_c)
		row.append(sec_c_count)
	        sec_c_sum = get_sum(sec_c)
		row.append(sec_c_sum)
	        sec_start = hdf5_getters.get_sections_start(h5)
		row.append(sec_start[i])	   
                sec_start_avg = get_avg(sec_start)
		row.append(sec_start_avg)
	        sec_start_max= get_max(sec_start)
		row.append(sec_start_max)
	        sec_start_min = get_min(sec_start)
		row.append(sec_start_min)
	        sec_start_stddev= get_stddev(sec_start)
		row.append(sec_start_stddev)
	        sec_start_count = get_count(sec_start)
		row.append(sec_start_count)
	        sec_start_sum = get_sum(sec_start)
		row.append(sec_start_sum)
		for i in row_sec_padding:	#appending the blank spaces at the end of the row
			row.append(i)
                

		writer.writerow(row)
		row=[]
		row=gral_info[:]


            #--------segments---------------"
	    row_seg_padding=padding(182)	#blank spaces at the end of the row
 	    row_front=padding(42)		#blank spaces left in front of segments
	    seg_c = hdf5_getters.get_segments_confidence(h5)
	    group_index=3
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(seg_c):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of the segments
			row.append(index)

		row.append(seg_c[i])
                seg_c_avg= get_avg(seg_c)
		row.append(seg_c_avg)
	        seg_c_max= get_max(seg_c)
		row.append(seg_c_max)
	        seg_c_min = get_min(seg_c)
		row.append(seg_c_min)
	        seg_c_stddev= get_stddev(seg_c)
		row.append(seg_c_stddev)
	        seg_c_count = get_count(seg_c)
		row.append(seg_c_count)
	        seg_c_sum = get_sum(seg_c)
		row.append(seg_c_sum)
                seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
		row.append(seg_loud_max[i])
                seg_loud_max_avg= get_avg(seg_loud_max)
		row.append(seg_loud_max_avg)
	        seg_loud_max_max= get_max(seg_loud_max)
		row.append(seg_loud_max_max)
	        seg_loud_max_min = get_min(seg_loud_max)
		row.append(seg_loud_max_min)
	        seg_loud_max_stddev= get_stddev(seg_loud_max)
		row.append(seg_loud_max_stddev)
	        seg_loud_max_count = get_count(seg_loud_max)
		row.append(seg_loud_max_count)
	        seg_loud_max_sum = get_sum(seg_loud_max)
		row.append(seg_loud_max_sum)
	        seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
		row.append(seg_loud_max_time[i])
	        seg_loud_max_time_avg= get_avg(seg_loud_max_time)
		row.append(seg_loud_max_time_avg)
	        seg_loud_max_time_max= get_max(seg_loud_max_time)
		row.append(seg_loud_max_time_max)
	        seg_loud_max_time_min = get_min(seg_loud_max_time)
		row.append(seg_loud_max_time_min)
	        seg_loud_max_time_stddev= get_stddev(seg_loud_max_time)
		row.append(seg_loud_max_time_stddev)
	        seg_loud_max_time_count = get_count(seg_loud_max_time)
		row.append(seg_loud_max_time_count)
	        seg_loud_max_time_sum = get_sum(seg_loud_max_time)
		row.append(seg_loud_max_time_sum)
	        seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
		row.append(seg_loud_start[i])
	        seg_loud_start_avg= get_avg(seg_loud_start)
		row.append(seg_loud_start_avg)
	        seg_loud_start_max= get_max(seg_loud_start)
		row.append(seg_loud_start_max)
	        seg_loud_start_min = get_min(seg_loud_start)
		row.append(seg_loud_start_min)
	        seg_loud_start_stddev= get_stddev(seg_loud_start)
		row.append(seg_loud_start_stddev)
	        seg_loud_start_count = get_count(seg_loud_start)
		row.append(seg_loud_start_count)
	        seg_loud_start_sum = get_sum(seg_loud_start)					      
		row.append(seg_loud_start_sum)
	        seg_start = hdf5_getters.get_segments_start(h5)
		row.append(seg_start[i])
	        seg_start_avg= get_avg(seg_start)
		row.append(seg_start_avg)
	        seg_start_max= get_max(seg_start)
		row.append(seg_start_max)
	        seg_start_min = get_min(seg_start)
		row.append(seg_start_min)
	        seg_start_stddev= get_stddev(seg_start)
		row.append(seg_start_stddev)
	        seg_start_count = get_count(seg_start)
		row.append(seg_start_count)
	        seg_start_sum = get_sum(seg_start)
		row.append(seg_start_sum)
		for i in row_seg_padding:	#appending blank spaces at the end of the row
			row.append(i)
                
		writer.writerow(row)
		row=[]
		row=gral_info[:]

	    #----------segments pitch and timbre---------------"
	    row_seg2_padding=padding(14)	#blank spaces left at the end of the row
	    row_front=padding(77)		#blank spaces left at the front of the segments and timbre
	    seg_pitch = hdf5_getters.get_segments_pitches(h5)
	    transpose_pitch= seg_pitch.transpose()          #this is to tranpose the matrix,so we can have 12 rows
	    group_index=4
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(transpose_pitch[0]):
		row.append(group_index)
		row.append(i)
		for index in row_front:  	#padding blanks in front of segments and timbre
			row.append(index)
	   
		row.append(transpose_pitch[0][i])
  		seg_pitch_avg= get_avg(transpose_pitch[0])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[0])	
		row.append(seg_pitch_max)
		seg_pitch_min = get_min(transpose_pitch[0])
		row.append(seg_pitch_min)
		seg_pitch_stddev= get_stddev(transpose_pitch[0])
		row.append(seg_pitch_stddev)
		seg_pitch_count = get_count(transpose_pitch[0])
		row.append(seg_pitch_count)
		seg_pitch_sum = get_sum(transpose_pitch[0])
		row.append(seg_pitch_sum)   
 		row.append(transpose_pitch[1][i])
 		seg_pitch_avg= get_avg(transpose_pitch[1])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[1])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[1])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[1])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[1])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[1])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[2][i])
 		seg_pitch_avg= get_avg(transpose_pitch[2])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[2])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[2])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[2])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[2])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[2])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[3][i])
 		seg_pitch_avg= get_avg(transpose_pitch[3])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[3])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[3])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[3])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[3])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[3])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[4][i])
 		seg_pitch_avg= get_avg(transpose_pitch[4])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[4])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[4])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[4])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[4])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[4])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[5][i])
 		seg_pitch_avg= get_avg(transpose_pitch[5])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[5])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[5])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[5])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[5])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[5])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[6][i])
 		seg_pitch_avg= get_avg(transpose_pitch[6])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[6])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[6])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[6])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[6])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[6])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[7][i])
 		seg_pitch_avg= get_avg(transpose_pitch[7])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[7])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[7])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[7])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[7])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[7])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[8][i])
 		seg_pitch_avg= get_avg(transpose_pitch[8])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[8])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[8])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[8])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[8])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[8])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[9][i])
 		seg_pitch_avg= get_avg(transpose_pitch[9])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[9])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[9])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[9])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[9])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[9])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[10][i])
 		seg_pitch_avg= get_avg(transpose_pitch[10])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[10])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[10])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[10])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[10])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[10])
		row.append(seg_pitch_sum)   
		row.append(transpose_pitch[11][i])
 		seg_pitch_avg= get_avg(transpose_pitch[11])
		row.append(seg_pitch_avg)
		seg_pitch_max= get_max(transpose_pitch[11])	
		row.append(seg_pitch_max)
	        seg_pitch_min = get_min(transpose_pitch[11])
		row.append(seg_pitch_min)
	        seg_pitch_stddev= get_stddev(transpose_pitch[11])
		row.append(seg_pitch_stddev)
	        seg_pitch_count = get_count(transpose_pitch[11])
		row.append(seg_pitch_count)
	        seg_pitch_sum = get_sum(transpose_pitch[11])
		row.append(seg_pitch_sum)   
		#timbre arrays
	        seg_timbre = hdf5_getters.get_segments_timbre(h5)
                transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
		row.append(transpose_timbre[0][i])
  		seg_timbre_avg= get_avg(transpose_timbre[0])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[0])	
		row.append(seg_timbre_max)
		seg_timbre_min = get_min(transpose_timbre[0])
		row.append(seg_timbre_min)
		seg_timbre_stddev=get_stddev(transpose_timbre[0])
		row.append(seg_timbre_stddev)
		seg_timbre_count = get_count(transpose_timbre[0])
		row.append(seg_timbre_count)
		seg_timbre_sum = get_sum(transpose_timbre[0])
		row.append(seg_timbre_sum)   
 		row.append(transpose_timbre[1][i])
 		seg_timbre_avg= get_avg(transpose_timbre[1])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[1])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[1])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[1])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[1])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[1])
		row.append(seg_timbre_sum)   
		row.append(transpose_timbre[2][i])
 		seg_timbre_avg= get_avg(transpose_timbre[2])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[2])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[2])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[2])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[2])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[2])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[3][i])
 		seg_timbre_avg= get_avg(transpose_timbre[3])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[3])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[3])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[3])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[3])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[3])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[4][i])
 		seg_timbre_avg= get_avg(transpose_timbre[4])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[4])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[4])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[4])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[4])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[4])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[5][i])
 		seg_timbre_avg= get_avg(transpose_timbre[5])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[5])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[5])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[5])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[5])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[5])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[6][i])
 		seg_timbre_avg= get_avg(transpose_timbre[6])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[6])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[6])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[6])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[6])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[6])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[7][i])
 		seg_timbre_avg= get_avg(transpose_timbre[7])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[7])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[7])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[7])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[7])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[7])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[8][i])
 		seg_timbre_avg= get_avg(transpose_timbre[8])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[8])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[8])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[8])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[8])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[8])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[9][i])
 		seg_timbre_avg= get_avg(transpose_timbre[9])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[9])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[9])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[9])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[9])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[9])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[10][i])
 		seg_timbre_avg= get_avg(transpose_timbre[10])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[10])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[10])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[10])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[10])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[10])
		row.append(seg_timbre_sum)   
		
		row.append(transpose_timbre[11][i])
 		seg_timbre_avg= get_avg(transpose_timbre[11])
		row.append(seg_timbre_avg)
		seg_timbre_max= get_max(transpose_timbre[11])	
		row.append(seg_timbre_max)
	        seg_timbre_min = get_min(transpose_timbre[11])
		row.append(seg_timbre_min)
	        seg_timbre_stddev= get_stddev(transpose_timbre[11])
		row.append(seg_timbre_stddev)
	        seg_timbre_count = get_count(transpose_timbre[11])
		row.append(seg_timbre_count)
	        seg_timbre_sum = get_sum(transpose_timbre[11])
		row.append(seg_timbre_sum)
	        for item in row_seg2_padding:
			row.append(item)
		writer.writerow(row)
		row=[]
		row=gral_info[:]


            # "--------tatums---------------"
	    tatms_c = hdf5_getters.get_tatums_confidence(h5)
	    group_index=5
	    row_front=padding(245)	#blank spaces left in front of tatums
	    row=[]
	    row=gral_info[:]
	    for i,item in enumerate(tatms_c):
		row.append(group_index)
		row.append(i)
		for item in row_front:	#appending blank spaces at the front of the row
			row.append(item)

		row.append(tatms_c[i])
		tatms_c_avg= get_avg(tatms_c)
		row.append(tatms_c_avg)
	 	tatms_c_max= get_max(tatms_c)
		row.append(tatms_c_max)
	        tatms_c_min = get_min(tatms_c)
		row.append(tatms_c_min)
	        tatms_c_stddev= get_stddev(tatms_c)
		row.append(tatms_c_stddev)
                tatms_c_count = get_count(tatms_c)
		row.append(tatms_c_count)
                tatms_c_sum = get_sum(tatms_c)
		row.append(tatms_c_sum)
                tatms_start = hdf5_getters.get_tatums_start(h5)
		row.append(tatms_start[i])
	        tatms_start_avg= get_avg(tatms_start)
		row.append(tatms_start_avg)
	        tatms_start_max= get_max(tatms_start)
		row.append(tatms_start_max)
	        tatms_start_min = get_min(tatms_start)
		row.append(tatms_start_min)
	        tatms_start_stddev= get_stddev(tatms_start)
		row.append(tatms_start_stddev)
	        tatms_start_count = get_count(tatms_start)
		row.append(tatms_start_count)
	        tatms_start_sum = get_sum(tatms_start)				   
		row.append(tatms_start_sum)
		writer.writerow(row)
		row=[]
		row=gral_info[:]


 
	    transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_pitch_avg=[]
	    seg_pitch_max=[]
	    seg_pitch_min=[]
            seg_pitch_stddev=[]
            seg_pitch_count=[]
	    seg_pitch_sum=[]
            i=0
	    #Getting the aggregate values in the pitches array
	    for row in transpose_pitch:
		   seg_pitch_avg.append(get_avg(row))
		   seg_pitch_max.append(get_max(row))
	           seg_pitch_min.append(get_min(row))
		   seg_pitch_stddev.append(get_stddev(row))
		   seg_pitch_count.append(get_count(row))
                   seg_pitch_sum.append(get_sum(row))
		   i=i+1

	    #extracting information from the timbre array 
            transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows
	    #arrays containing the aggregate values of the 12 rows
	    seg_timbre_avg=[]
	    seg_timbre_max=[]
	    seg_timbre_min=[]
            seg_timbre_stddev=[]
            seg_timbre_count=[]
	    seg_timbre_sum=[]
            i=0
	    for row in transpose_timbre:
		   seg_timbre_avg.append(get_avg(row))
		   seg_timbre_max.append(get_max(row))
	           seg_timbre_min.append(get_min(row))
		   seg_timbre_stddev.append(get_stddev(row))
		   seg_timbre_count.append(get_count(row))
                   seg_timbre_sum.append(get_sum(row))
		   i=i+1








	    h5.close()
	    count=count+1;
	    print count;
Exemple #28
0
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Familiarity'.lower():  ####Added by us!
                    csvRowString += song.familiarity
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += song.artist_mbid
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid
                elif attribute == 'artist_7digid'.lower():
                    csvRowString += song.artist_7digid
                elif attribute == 'hottness'.lower():
                    csvRowString += song.hottness
                elif attribute == 'song_hottness'.lower():
                    csvRowString += song.song_hottness
                elif attribute == 'digitalid7'.lower():
                    csvRowString += song.digitalid7
                elif attribute == 'similar_artists'.lower():
                    csvRowString += song.similar_artists
                elif attribute == 'artist_terms'.lower():
                    csvRowString += song.artist_terms
                elif attribute == 'art_terms_freq'.lower():
                    csvRowString += song.art_terms_freq
                elif attribute == 'art_terms_weight'.lower():
                    csvRowString += song.art_terms_weight
                elif attribute == 'a_sample_rate'.lower():
                    csvRowString += song.a_sample_rate
                elif attribute == 'audio_md5'.lower():
                    csvRowString += song.audio_md5
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_conf'.lower():
                    csvRowString += song.mode_conf
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                elif attribute == 'trackid'.lower():
                    csvRowString += song.trackid
                elif attribute == 'segm_start'.lower():
                    csvRowString += song.segm_start
                elif attribute == 'segm_conf'.lower():
                    csvRowString += song.segm_conf
                elif attribute == 'segm_pitch'.lower():
                    csvRowString += song.segm_pitch
                elif attribute == 'segm_timbre'.lower():
                    csvRowString += song.segm_timbre
                elif attribute == 'segm_max_loud'.lower():
                    csvRowString += song.segm_max_loud
                elif attribute == 'segm_max_loud_time'.lower():
                    csvRowString += song.segm_max_loud_time
                elif attribute == 'segm_loud_start'.lower():
                    csvRowString += song.segm_loud_start
                elif attribute == 'sect_start'.lower():
                    csvRowString += song.sect_start
                elif attribute == 'sect_conf'.lower():
                    csvRowString += song.sect_conf
                elif attribute == 'beats_start'.lower():
                    csvRowString += song.beats_start
                elif attribute == 'beats_conf'.lower():
                    csvRowString += song.beats_conf
                elif attribute == 'bars_start'.lower():
                    csvRowString += song.bars_start
                elif attribute == 'bars_conf'.lower():
                    csvRowString += song.bars_conf
                elif attribute == 'tatums_start'.lower():
                    csvRowString += song.tatums_start
                elif attribute == 'tatums_conf'.lower():
                    csvRowString += song.tatums_conf
                elif attribute == 'artist_mbtags'.lower():
                    csvRowString += song.artist_mbtags
                elif attribute == 'artist_mbtags_count'.lower():
                    csvRowString += song.artist_mbtags_count
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print("==============")
                    print("I believe there has been an error with the input.")
                    print("==============")
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,Title,Year,Familiarity,Artist_Mbid,Artist_PlaymeId,Artist_7didId,Hottness,Song_Hottness,7digitalid,A_Sample_Rate,Audio_Md5,End_Of_Fade_In,Energy,Loudness,Mode,Mode_Conf,Start_Of_Fade_Out,TrackId"
        #################################################

        csvAttributeList = re.split(',', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        csvRowString += "\n"
        outputFile1.write(csvRowString)
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "/home/bigdata/smalltest/"  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print(f)

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            # testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))

            #########Added by us!
            song.familiarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.artist_playmeid = str(
                hdf5_getters.get_artist_playmeid(songH5File))
            song.artist_7digid = str(
                hdf5_getters.get_artist_7digitalid(songH5File))
            song.hottness = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.song_hottness = str(
                hdf5_getters.get_song_hotttnesss(songH5File))
            song.digitalid7 = str(
                hdf5_getters.get_track_7digitalid(songH5File))
            #song.similar_artists = str(hdf5_getters.get_similar_artists(songH5File))
            #song.artist_terms = str(hdf5_getters.get_artist_terms(songH5File))
            #song.art_terms_freq = str(hdf5_getters.get_artist_terms_freq(songH5File))
            #song.art_terms_weight = str(hdf5_getters.get_artist_terms_weight(songH5File))
            song.a_sample_rate = str(
                hdf5_getters.get_analysis_sample_rate(songH5File))
            song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File))
            song.end_of_fade_in = str(
                hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.mode_conf = str(hdf5_getters.get_mode_confidence(songH5File))
            song.start_of_fade_out = str(
                hdf5_getters.get_start_of_fade_out(songH5File))
            song.trackid = str(hdf5_getters.get_track_id(songH5File))
            #song.segm_start = str(hdf5_getters.get_segments_start(songH5File))
            #song.segm_conf = str(hdf5_getters.get_segments_confidence(songH5File))
            #song.segm_pitch = str(hdf5_getters.get_segments_pitches(songH5File))
            #song.segm_timbre = str(hdf5_getters.get_segments_timbre(songH5File))
            #song.segm_max_loud = str(hdf5_getters.get_segments_loudness_max(songH5File))
            #song.segm_max_loud_time = str(hdf5_getters.get_segments_loudness_max_time(songH5File))
            #song.segm_loud_start = str(hdf5_getters.get_segments_loudness_start(songH5File))
            #song.sect_start = str(hdf5_getters.get_sections_start(songH5File))
            #song.sect_conf = str(hdf5_getters.get_sections_confidence(songH5File))
            #song.beats_start = str(hdf5_getters.get_beats_start(songH5File))
            #song.beats_conf = str(hdf5_getters.get_beats_confidence(songH5File))
            #song.bars_start = str(hdf5_getters.get_bars_start(songH5File))
            #song.bars_conf = str(hdf5_getters.get_bars_confidence(songH5File))
            #song.tatums_start = str(hdf5_getters.get_tatums_start(songH5File))
            #song.tatums_conf = str(hdf5_getters.get_tatums_confidence(songH5File))
            #song.artist_mbtags = str(hdf5_getters.get_artist_mbtags(songH5File))
            #song.artist_mbtags_count = str(hdf5_getters.get_artist_mbtags_count(songH5File))

            #print song count
            #csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace("b\"", "")
                    albumName = albumName.replace("\"", "")
                    albumName = albumName.replace(',', "")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',', '')
                    location = location.replace("b\"", "")
                    location = location.replace("\"", "")
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    artistName = song.artistName
                    artistName = artistName.replace("b\"", "")
                    artistName = artistName.replace("\"", "")
                    csvRowString += "\"" + artistName + "\""
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    t = song.title
                    t = t.replace("b\"", "")
                    t = t.replace("\"", "")
                    csvRowString += "\"" + t + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'Familiarity'.lower():  ####Added by us!
                    csvRowString += song.familiarity
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += "\"" + song.artist_mbid + "\""
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid
                elif attribute == 'artist_7digid'.lower():
                    csvRowString += song.artist_7digid
                elif attribute == 'hottness'.lower():
                    csvRowString += song.hottness
                elif attribute == 'song_hottness'.lower():
                    csvRowString += song.song_hottness
                elif attribute == 'digitalid7'.lower():
                    csvRowString += song.digitalid7
                elif attribute == 'similar_artists'.lower():
                    csvRowString += song.similar_artists
                elif attribute == 'artist_terms'.lower():
                    csvRowString += song.artist_terms
                elif attribute == 'art_terms_freq'.lower():
                    csvRowString += song.art_terms_freq
                elif attribute == 'art_terms_weight'.lower():
                    csvRowString += song.art_terms_weight
                elif attribute == 'a_sample_rate'.lower():
                    csvRowString += song.a_sample_rate
                elif attribute == 'audio_md5'.lower():
                    csvRowString += "\"" + song.audio_md5 + "\""
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_conf'.lower():
                    csvRowString += song.mode_conf
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                elif attribute == 'trackid'.lower():
                    csvRowString += "\"" + song.trackid + "\""
                elif attribute == 'segm_start'.lower():
                    csvRowString += song.segm_start
                elif attribute == 'segm_conf'.lower():
                    csvRowString += song.segm_conf
                elif attribute == 'segm_pitch'.lower():
                    csvRowString += song.segm_pitch
                elif attribute == 'segm_timbre'.lower():
                    csvRowString += song.segm_timbre
                elif attribute == 'segm_max_loud'.lower():
                    csvRowString += song.segm_max_loud
                elif attribute == 'segm_max_loud_time'.lower():
                    csvRowString += song.segm_max_loud_time
                elif attribute == 'segm_loud_start'.lower():
                    csvRowString += song.segm_loud_start
                elif attribute == 'sect_start'.lower():
                    csvRowString += song.sect_start
                elif attribute == 'sect_conf'.lower():
                    csvRowString += song.sect_conf
                elif attribute == 'beats_start'.lower():
                    csvRowString += song.beats_start
                elif attribute == 'beats_conf'.lower():
                    csvRowString += song.beats_conf
                elif attribute == 'bars_start'.lower():
                    csvRowString += song.bars_start
                elif attribute == 'bars_conf'.lower():
                    csvRowString += song.bars_conf
                elif attribute == 'tatums_start'.lower():
                    csvRowString += song.tatums_start
                elif attribute == 'tatums_conf'.lower():
                    csvRowString += song.tatums_conf
                elif attribute == 'artist_mbtags'.lower():
                    csvRowString += song.artist_mbtags
                elif attribute == 'artist_mbtags_count'.lower():
                    csvRowString += song.artist_mbtags_count
                else:
                    csvRowString += "\"ERR\""

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" +
                "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n" +
                "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"


                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'track_id'.lower():
                    csvRowString += 'track_id' 
                elif attribute == 'artist_familiarity'.lower():
                    csvRowString += 'artist_familiarity' 
                elif attribute == 'artist_hotttnesss'.lower():
                    csvRowString += 'artist_hotttnesss' 
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += 'artist_mbid'
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += 'artist_playmeid'
                elif attribute == 'artist_7digitalid'.lower():
                    csvRowString += 'artist_7digitalid'
                elif attribute == 'release'.lower():
                    csvRowString += 'release' 
                elif attribute == 'release_7digitalid'.lower():
                    csvRowString += 'release_7digitalid' 
                elif attribute == 'song_hotttnesss'.lower():
                    csvRowString += 'song_hotttnesss'
                elif attribute == 'track_7digitalid'.lower():
                    csvRowString += 'track_7digitalid' 
                elif attribute == 'analysis_sample_rate'.lower():
                    csvRowString += 'analysis_sample_rate' 
                elif attribute == 'audio_md5'.lower():
                    csvRowString += 'audio_md5' 
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += 'end_of_fade_in'
                elif attribute == 'energy'.lower():
                    csvRowString += 'energy'  
                elif attribute == 'key'.lower():
                    csvRowString += 'key'  
                elif attribute == 'key_confidence'.lower():
                    csvRowString += 'key_confidence' 
                elif attribute == 'loudness'.lower():
                    csvRowString += 'loudness' 
                elif attribute == 'mode'.lower():
                    csvRowString += 'mode'  
                elif attribute == 'mode_confidence'.lower():
                    csvRowString += 'mode_confidence'   
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += 'start_of_fade_out'                                                                                
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user, 
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+
            "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+
            "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+
            "Title,Year,track_id,artist_hotttnesss,artist_mbid,artist_playmeid,artist_7digitalid,"+
            "release,release_7digitalid,song_hotttnesss,track_7digitalid,analysis_sample_rate,audio_md5,"+
            "end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,");
        outputFile1.write(csvRowString + "\n");
        csvRowString = ""  

    #################################################


    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "/vagrant/genrepython/MillionSongSubset" # "." As the default means the current directory
    ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):        
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            song.track_id = str(hdf5_getters.get_track_id(songH5File))
            song.artist_familiarity = str(hdf5_getters.get_artist_familiarity(songH5File))
            song.artist_hotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File))
            song.artist_playmeid = str(hdf5_getters.get_artist_playmeid(songH5File))
            song.artist_7digitalid = str(hdf5_getters.get_artist_7digitalid(songH5File))
            song.release = str(hdf5_getters.get_release(songH5File))
            song.release_7digitalid = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.song_hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.track_7digitalid = str(hdf5_getters.get_track_7digitalid(songH5File))
            song.analysis_sample_rate = str(hdf5_getters.get_analysis_sample_rate(songH5File))
            song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File))
            song.end_of_fade_in = str(hdf5_getters.get_end_of_fade_in(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            song.key = str(hdf5_getters.get_key(songH5File))
            song.key_confidence = str(hdf5_getters.get_key_confidence(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.mode_confidence = str(hdf5_getters.get_mode_confidence(songH5File))
            song.start_of_fade_out = str(hdf5_getters.get_start_of_fade_out(songH5File))

            #print song count
            csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',',"")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',','')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude                
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""                
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence                                 
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence                                   
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'track_id'.lower():
                    csvRowString += song.track_id 
                elif attribute == 'artist_familiarity'.lower():
                    csvRowString += song.artist_familiarity  
                elif attribute == 'artist_hotttnesss'.lower():
                    csvRowString += song.artist_hotttnesss 
                elif attribute == 'artist_mbid'.lower():
                    csvRowString += song.artist_mbid
                elif attribute == 'artist_playmeid'.lower():
                    csvRowString += song.artist_playmeid 
                elif attribute == 'artist_7digitalid'.lower():
                    csvRowString += song.artist_7digitalid 
                elif attribute == 'release'.lower():
                    csvRowString += song.release
                elif attribute == 'release_7digitalid'.lower():
                    csvRowString += song.release_7digitalid 
                elif attribute == 'song_hotttnesss'.lower():
                    csvRowString += song.song_hotttnesss  
                elif attribute == 'track_7digitalid'.lower():
                    csvRowString += song.track_7digitalid 
                elif attribute == 'analysis_sample_rate'.lower():
                    csvRowString += song.analysis_sample_rate  
                elif attribute == 'audio_md5'.lower():
                    csvRowString += song.audio_md5 
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in  
                elif attribute == 'energy'.lower():
                    csvRowString += song.energy 
                elif attribute == 'key'.lower():
                    csvRowString += song.key 
                elif attribute == 'key_confidence'.lower():
                    csvRowString += song.key_confidence 
                elif attribute == 'loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode   
                elif attribute == 'mode_confidence'.lower():
                    csvRowString += song.mode_confidence    
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out                                                                              
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
 artist_mbtags = ','.join(str(e) for e in GETTERS.get_artist_mbtags(h5, i)) # array
 artist_mbtags_count = ','.join(str(e) for e in GETTERS.get_artist_mbtags_count(h5, i)) # array
 artist_name = GETTERS.get_artist_name(h5, i)
 artist_playmeid = GETTERS.get_artist_playmeid(h5, i)
 artist_terms = ','.join(str(e) for e in GETTERS.get_artist_terms(h5, i)) # array
 #artist_terms_freq = ','.join(str(e) for e in GETTERS.get_artist_terms_freq(h5, i)) # array
 #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array
 #audio_md5 = GETTERS.get_audio_md5(h5, i)
 #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array
 #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array
 #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array
 #beats_start = ','.join(str(e) for e in GETTERS.get_beats_start(h5, i)) # array
 danceability = GETTERS.get_danceability(h5, i)
 duration = GETTERS.get_duration(h5, i)
 end_of_fade_in = GETTERS.get_end_of_fade_in(h5, i)
 energy = GETTERS.get_energy(h5, i)
 key = GETTERS.get_key(h5, i)
 key_confidence = GETTERS.get_key_confidence(h5, i)
 loudness = GETTERS.get_loudness(h5, i)
 mode = GETTERS.get_mode(h5, i)
 mode_confidence = GETTERS.get_mode_confidence(h5, i)
 release = GETTERS.get_release(h5, i)
 release_7digitalid = GETTERS.get_release_7digitalid(h5, i)
 #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array
 #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array
 #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array
 #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array
 #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array
 #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array
 #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array
 #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array
Exemple #31
0
def main():
    outputFile = open('songs.csv', 'w')
    writer = csv.writer(outputFile)

    csvRowString = "song_number,artist_familiarity,artist_hotttnesss,artist_id,artist_mbid,artist_playmeid,artist_7digitalid,artist_latitude,artist_longitude,artist_location,artist_name,release,release_7digitalid,song_id,song_hotttnesss,title,track_7digitalid,analysis_sample_rate,audio_md5,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out,tempo,time_signature,time_signature_confidence,track_id,year"

    outputFile.write(csvRowString + "\n")
    csvRowString = ""

    #################################################
    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "."  # "." As the default means the current directory
    ext = ".H5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    songCount = 0
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print(f)

            songH5File = hdf5_getters.open_h5_file_read(f)

            values = [
                songCount,
                hdf5_getters.get_artist_familiarity(songH5File),
                hdf5_getters.get_artist_hotttnesss(songH5File),
                hdf5_getters.get_artist_id(songH5File),
                hdf5_getters.get_artist_mbid(songH5File),
                hdf5_getters.get_artist_playmeid(songH5File),
                hdf5_getters.get_artist_7digitalid(songH5File),
                hdf5_getters.get_artist_latitude(songH5File),
                hdf5_getters.get_artist_longitude(songH5File),
                hdf5_getters.get_artist_location(songH5File),
                hdf5_getters.get_artist_name(songH5File),
                hdf5_getters.get_release(songH5File),
                hdf5_getters.get_release_7digitalid(songH5File),
                hdf5_getters.get_song_id(songH5File),
                hdf5_getters.get_song_hotttnesss(songH5File),
                hdf5_getters.get_title(songH5File),
                hdf5_getters.get_track_7digitalid(songH5File),
                hdf5_getters.get_analysis_sample_rate(songH5File),
                hdf5_getters.get_audio_md5(songH5File),
                hdf5_getters.get_danceability(songH5File),
                hdf5_getters.get_duration(songH5File),
                hdf5_getters.get_end_of_fade_in(songH5File),
                hdf5_getters.get_energy(songH5File),
                hdf5_getters.get_key(songH5File),
                hdf5_getters.get_key_confidence(songH5File),
                hdf5_getters.get_loudness(songH5File),
                hdf5_getters.get_mode(songH5File),
                hdf5_getters.get_mode_confidence(songH5File),
                hdf5_getters.get_start_of_fade_out(songH5File),
                hdf5_getters.get_tempo(songH5File),
                hdf5_getters.get_time_signature(songH5File),
                hdf5_getters.get_time_signature_confidence(songH5File),
                hdf5_getters.get_track_id(songH5File),
                hdf5_getters.get_year(songH5File)
            ]
            songH5File.close()
            songCount = songCount + 1

            writer.writerow(values)

    outputFile.close()
def complete_hd5_to_csv(basedir):
    ext = '.h5'  # Get all files with extension .h5

    # Header title. Essentially it is a schema for all the following songs
    header = [
        'Title', 'Artist familiarity', 'Artist hotness', 'Artist ID',
        'Artist mbID', 'Artist playmeid', 'Artist 7DigitalID',
        'Artist latitude', 'Artist longitude', 'Artist location',
        'Artist Name', 'Release', 'Release 7DigitalID', 'Song ID',
        'Song Hotness', 'Track 7Digital', 'Analysis sample rate', 'Audio md5',
        'Danceability', 'Duration', 'End of Fade', 'Energy', 'Key',
        'Key Confidence', 'Loudness', 'Mode', 'Mode Confidence',
        'Start of fade out', 'Tempo', 'Time signature',
        'Time signature confidence', 'Track ID', 'Year'
    ]

    with open('Tester2.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';')

        # writing the header line. This line contains the schema of the data
        csv_writer.writerow(header)

        # Read all files from the given directories
        for root, dirs, files in os.walk(basedir):
            files = glob.glob(os.path.join(root, '*' + ext))
            print(files)

            for f in files:
                h5 = hdf5_getters.open_h5_file_read(f)

                # Write as row all elements. NOTE: Only the serialized elements are parsed and not arrays
                csv_writer.writerow([
                    hdf5_getters.get_title(h5),
                    hdf5_getters.get_artist_familiarity(h5),
                    hdf5_getters.get_artist_hotttnesss(h5),
                    hdf5_getters.get_artist_id(h5),
                    hdf5_getters.get_artist_mbid(h5),
                    hdf5_getters.get_artist_playmeid(h5),
                    hdf5_getters.get_artist_7digitalid(h5),
                    hdf5_getters.get_artist_latitude(h5),
                    hdf5_getters.get_artist_longitude(h5),
                    hdf5_getters.get_artist_location(h5),
                    hdf5_getters.get_artist_name(h5),
                    hdf5_getters.get_release(h5),
                    hdf5_getters.get_release_7digitalid(h5),
                    hdf5_getters.get_song_id(h5),
                    hdf5_getters.get_song_hotttnesss(h5),
                    hdf5_getters.get_track_7digitalid(h5),
                    hdf5_getters.get_analysis_sample_rate(h5),
                    hdf5_getters.get_audio_md5(h5),
                    hdf5_getters.get_danceability(h5),
                    hdf5_getters.get_duration(h5),
                    hdf5_getters.get_end_of_fade_in(h5),
                    hdf5_getters.get_energy(h5),
                    hdf5_getters.get_key(h5),
                    hdf5_getters.get_key_confidence(h5),
                    hdf5_getters.get_loudness(h5),
                    hdf5_getters.get_mode(h5),
                    hdf5_getters.get_mode_confidence(h5),
                    hdf5_getters.get_start_of_fade_out(h5),
                    hdf5_getters.get_tempo(h5),
                    hdf5_getters.get_time_signature(h5),
                    hdf5_getters.get_time_signature_confidence(h5),
                    hdf5_getters.get_track_id(h5),
                    hdf5_getters.get_year(h5)
                ])

                # For debugging purposes. Everything as expected
                # print()
                # print("Num of songs -- ", hdf5_getters.get_num_songs(h5))  # One song per file
                # print("Title -- ", hdf5_getters.get_title(h5))  # Print the title of a specific h5 file
                # print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5))
                # print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5))
                # print("Artist ID -- ", hdf5_getters.get_artist_id(h5))
                # print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5))
                # print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5))
                # print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5))
                # print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5))
                # print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5))
                # print("Artist location -- ", hdf5_getters.get_artist_location(h5))
                # print("Artist Name -- ", hdf5_getters.get_artist_name(h5))
                # print("Release -- ", hdf5_getters.get_release(h5))
                # print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5))
                # print("Song ID -- ", hdf5_getters.get_song_id(h5))
                # print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5))
                # print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5))
                # print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5))
                # print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5))
                # print("Danceability -- ", hdf5_getters.get_danceability(h5))
                # print("Duration -- ", hdf5_getters.get_duration(h5))
                # print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5))
                # print("Energy -- ", hdf5_getters.get_energy(h5))
                # print("Key -- ", hdf5_getters.get_key(h5))
                # print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5))
                # print("Loudness -- ", hdf5_getters.get_loudness(h5))
                # print("Mode -- ", hdf5_getters.get_mode(h5))
                # print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5))
                # print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5))
                # print("Tempo -- ", hdf5_getters.get_tempo(h5))
                # print("Time signature -- ", hdf5_getters.get_time_signature(h5))
                # print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5))
                # print("Track ID -- ", hdf5_getters.get_track_id(h5))
                # # print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5))
                # # print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5))
                # print("Year -- ", hdf5_getters.get_year(h5))

                h5.close()
def writeSingleHDF5FileToTxtFile(songHDF5FileName):
    global maximumArtistNameLen
    global maximumArtistTagLen
    global maximumSongNameLen
    global maximumAlbumNameLen
    """
    This function does 3 simple things:
    - open the song file
    - get artist ID and put it
    - close the file
    """
    songHDF5File = GETTERS.open_h5_file_read(songHDF5FileName)

    songID = GETTERS.get_song_id(songHDF5File)
    songName = GETTERS.get_title(songHDF5File)
    artistID = GETTERS.get_artist_id(songHDF5File)
    songAlbum = GETTERS.get_release(songHDF5File)
    songYear = GETTERS.get_year(songHDF5File)
    songTempo = GETTERS.get_tempo(songHDF5File)
    songDanceability = GETTERS.get_danceability(songHDF5File)
    songDuration = GETTERS.get_duration(songHDF5File)
    songEnergy = GETTERS.get_energy(songHDF5File)
    songKey = GETTERS.get_key(songHDF5File)
    songLoudness = GETTERS.get_loudness(songHDF5File)
    songMode = GETTERS.get_mode(songHDF5File)
    songTimeSignature = GETTERS.get_time_signature(songHDF5File)

    songsTableFile.write(songID + "\t" + songName + "\t" + artistID + "\t" +
                         songAlbum + "\t" + str(songYear) + "\t" +
                         str(songTempo) + "\t" + str(songDanceability) + "\t" +
                         str(songDuration) + "\t" + str(songEnergy) + "\t" +
                         str(songKey) + "\t" + str(songLoudness) + "\t" +
                         str(songMode) + "\t" + str(songTimeSignature) +
                         "\t\n")

    artistName = GETTERS.get_artist_name(songHDF5File)
    artistFamiliarity = GETTERS.get_artist_familiarity(songHDF5File)
    artistTagsArray = GETTERS.get_artist_mbtags(songHDF5File)

    artistsTableFile.write(artistID + "\t" + artistName + "\t" +
                           str(artistFamiliarity) + "\t\n")

    if len(songName) > maximumSongNameLen:
        maximumSongNameLen = len(songName)

    if len(songAlbum) > maximumAlbumNameLen:
        maximumAlbumNameLen = len(songAlbum)

    if len(artistName) > maximumArtistNameLen:
        maximumArtistNameLen = len(artistName)

    for artistTag in artistTagsArray:
        if artistTag in allowedTagsSet:

            artistsTagsTableFile.write(artistID + "\t" + artistTag + "\t\n")
            if artistTag not in tagsSet:
                tagsTableFile.write(artistTag + "\t\n")
                tagsSet.add(artistTag)
            if len(artistTag) > maximumArtistTagLen:
                maximumArtistTagLen = len(artistTag)

    similarArtists = GETTERS.get_similar_artists(songHDF5File)

    for similarArtist in similarArtists:
        similarArtistsPairsList.add((artistID, similarArtist))

    artistsIDsSet.add(artistID)
    artistsNamesSet.add(artistName)

    songHDF5File.close()
def parse_aggregate_songs(file_name,file_name2,artist_map):
    """
    Given an aggregate filename and artist_map in the format
    {artist_name: {data pertaining to artist}}
    """
    """
    TODO: 
    -this function goes through each song, if artist not in there,
    add all data necesary and add first song info.
    else update any specific song info

    -song info is a map from attributename:[values]
    """
    #artist_map = {}
    h5 = hdf5_getters.open_h5_file_read(file_name)
    numSongs = hdf5_getters.get_num_songs(h5)
    print 'Parsing song file...'
    for i in range(numSongs):
        artist_name = hdf5_getters.get_artist_name(h5,i)

        #Filter location
        longi = hdf5_getters.get_artist_longitude(h5,i)
        lat = hdf5_getters.get_artist_latitude(h5,i)
        loc = hdf5_getters.get_artist_location(h5,i)
        if math.isnan(lat) or math.isnan(longi):
            #skip if no location
            continue

        #filter year
        yr = hdf5_getters.get_year(h5,i)
        if yr == 0:
            #skip if no year
            continue

        #filter hotttness and familiarity
        familiarity = hdf5_getters.get_artist_familiarity(h5,i)
        hotttness = hdf5_getters.get_artist_hotttnesss(h5,i)
        if familiarity<=0.0 or hotttness<=0.0:
            #skip if no hotttness or familiarity computations
            continue

        #TODO:MAYBE filter on dance and energy
        timbre = hdf5_getters.get_segments_timbre(h5,i)
        #timbre[#] gives len 12 array so for each arr in timbre, add up to get segment and add to corresponding 12 features and avg across each
        if not artist_name in artist_map:
            #have not encountered the artist yet, so populate new map
            sub_map = {}
            sub_map['artist_familiarity'] = familiarity
            sub_map['artist_hotttnesss'] = hotttness
            sub_map['artist_id'] = hdf5_getters.get_artist_id(h5,i)
            #longi = hdf5_getters.get_artist_longitude(h5,i)
            #lat = hdf5_getters.get_artist_latitude(h5,i)
            #longi = None if math.isnan(longi) else longi
            #lat = None if math.isnan(lat) else lat
            sub_map['artist_latitude'] = lat
            sub_map['artist_longitude'] = longi
            sub_map['artist_location'] = loc
            sub_map['artist_terms'] = hdf5_getters.get_artist_terms(h5,i)
            #TODO:see if should weight by freq or weight for if the term matches one of the feature terms
            sub_map['artist_terms_freq'] = list(hdf5_getters.get_artist_terms_freq(h5,i))
            sub_map['artist_terms_weight'] = list(hdf5_getters.get_artist_terms_weight(h5,i))

            #song-sepcific data
            #TODO COMPUTE AN AVG TIMBRE FOR A SONG BY IDEA:
            #SUMMING DOWN EACH 12 VECTOR FOR EACH PT IN SONG AND AVG THIS ACROSS SONG
            dance = hdf5_getters.get_danceability(h5,i)
            dance = None if dance == 0.0 else dance
            energy = hdf5_getters.get_energy(h5,i)
            energy = None if energy == 0.0 else energy
            sub_map['danceability'] = [dance]
            sub_map['duration'] = [hdf5_getters.get_duration(h5,i)]
            sub_map['end_of_fade_in'] = [hdf5_getters.get_end_of_fade_in(h5,i)]
            sub_map['energy'] = [energy]
            #since each song has a key, ask if feature for keys should be num of songs that appear in that key or
            #just binary if any of their songs has that key or just be avg of songs with that key
            #same for mode, since its either major or minor...should it be count or avg.?
            sub_map['key'] = [hdf5_getters.get_key(h5,i)]
            sub_map['loudness'] = [hdf5_getters.get_loudness(h5,i)]
            sub_map['mode'] = [hdf5_getters.get_mode(h5,i)] #major or minor 0/1
            s_hot = hdf5_getters.get_song_hotttnesss(h5,i)
            s_hot = None if math.isnan(s_hot) else s_hot
            sub_map['song_hotttnesss'] = [s_hot]
            sub_map['start_of_fade_out'] = [hdf5_getters.get_start_of_fade_out(h5,i)]
            sub_map['tempo'] = [hdf5_getters.get_tempo(h5,i)]
            #should time signature be count as well? binary?
            sub_map['time_signature'] = [hdf5_getters.get_time_signature(h5,i)]
            sub_map['track_id'] = [hdf5_getters.get_track_id(h5,i)]
            #should year be binary since they can have many songs across years and should it be year:count
            sub_map['year'] = [yr]

            artist_map[artist_name] = sub_map
        else:
            #artist already exists, so get its map and update song fields
            dance = hdf5_getters.get_danceability(h5,i)
            dance = None if dance == 0.0 else dance
            energy = hdf5_getters.get_energy(h5,i)
            energy = None if energy == 0.0 else energy
            artist_map[artist_name]['danceability'].append(dance)
            artist_map[artist_name]['duration'].append(hdf5_getters.get_duration(h5,i))
            artist_map[artist_name]['end_of_fade_in'].append(hdf5_getters.get_end_of_fade_in(h5,i))
            artist_map[artist_name]['energy'].append(energy)
            artist_map[artist_name]['key'].append(hdf5_getters.get_key(h5,i))
            artist_map[artist_name]['loudness'].append(hdf5_getters.get_loudness(h5,i))
            artist_map[artist_name]['mode'].append(hdf5_getters.get_mode(h5,i)) #major or minor 0/1
            s_hot = hdf5_getters.get_song_hotttnesss(h5,i)
            s_hot = None if math.isnan(s_hot) else s_hot
            artist_map[artist_name]['song_hotttnesss'].append(s_hot)
            artist_map[artist_name]['start_of_fade_out'].append(hdf5_getters.get_start_of_fade_out(h5,i))
            artist_map[artist_name]['tempo'].append(hdf5_getters.get_tempo(h5,i))
            #should time signature be count as well? binary?
            artist_map[artist_name]['time_signature'].append(hdf5_getters.get_time_signature(h5,i))
            artist_map[artist_name]['track_id'].append(hdf5_getters.get_track_id(h5,i))
            #should year be binary since they can have many songs across years and should it be year:count
            artist_map[artist_name]['year'].append(yr)

    h5 = hdf5_getters.open_h5_file_read(file_name2)
    numSongs = hdf5_getters.get_num_songs(h5)
    print 'Parsing song file2...'
    for i in range(numSongs):
        song_id = hdf5_getters.get_track_id(h5,i)
        artist_name = hdf5_getters.get_artist_name(h5,i)
        if artist_name in artist_map and song_id in artist_map[artist_name]['track_id']:
            continue

        #Filter location
        longi = hdf5_getters.get_artist_longitude(h5,i)
        lat = hdf5_getters.get_artist_latitude(h5,i)
        loc = hdf5_getters.get_artist_location(h5,i)
        if math.isnan(lat) or math.isnan(longi):
            #skip if no location
            continue

        #filter year
        yr = hdf5_getters.get_year(h5,i)
        if yr == 0:
            #skip if no year
            continue

        #filter hotttness and familiarity
        familiarity = hdf5_getters.get_artist_familiarity(h5,i)
        hotttness = hdf5_getters.get_artist_hotttnesss(h5,i)
        if familiarity<=0.0 or hotttness<=0.0:
            #skip if no hotttness or familiarity computations
            continue

        #TODO:MAYBE filter on dance and energy
        timbre = hdf5_getters.get_segments_timbre(h5,i)
        #timbre[#] gives len 12 array so for each arr in timbre, add up to get segment and add to corresponding 12 features and avg across each
        if not artist_name in artist_map:
            #have not encountered the artist yet, so populate new map
            sub_map = {}
            sub_map['artist_familiarity'] = familiarity
            sub_map['artist_hotttnesss'] = hotttness
            sub_map['artist_id'] = hdf5_getters.get_artist_id(h5,i)
            #longi = hdf5_getters.get_artist_longitude(h5,i)
            #lat = hdf5_getters.get_artist_latitude(h5,i)
            #longi = None if math.isnan(longi) else longi
            #lat = None if math.isnan(lat) else lat
            sub_map['artist_latitude'] = lat
            sub_map['artist_longitude'] = longi
            sub_map['artist_location'] = loc
            sub_map['artist_terms'] = hdf5_getters.get_artist_terms(h5,i)
            #TODO:see if should weight by freq or weight for if the term matches one of the feature terms
            sub_map['artist_terms_freq'] = list(hdf5_getters.get_artist_terms_freq(h5,i))
            sub_map['artist_terms_weight'] = list(hdf5_getters.get_artist_terms_weight(h5,i))

            #song-sepcific data
            #TODO COMPUTE AN AVG TIMBRE FOR A SONG BY IDEA:
            #SUMMING DOWN EACH 12 VECTOR FOR EACH PT IN SONG AND AVG THIS ACROSS SONG
            dance = hdf5_getters.get_danceability(h5,i)
            dance = None if dance == 0.0 else dance
            energy = hdf5_getters.get_energy(h5,i)
            energy = None if energy == 0.0 else energy
            sub_map['danceability'] = [dance]
            sub_map['duration'] = [hdf5_getters.get_duration(h5,i)]
            sub_map['end_of_fade_in'] = [hdf5_getters.get_end_of_fade_in(h5,i)]
            sub_map['energy'] = [energy]
            #since each song has a key, ask if feature for keys should be num of songs that appear in that key or
            #just binary if any of their songs has that key or just be avg of songs with that key
            #same for mode, since its either major or minor...should it be count or avg.?
            sub_map['key'] = [hdf5_getters.get_key(h5,i)]
            sub_map['loudness'] = [hdf5_getters.get_loudness(h5,i)]
            sub_map['mode'] = [hdf5_getters.get_mode(h5,i)] #major or minor 0/1
            s_hot = hdf5_getters.get_song_hotttnesss(h5,i)
            s_hot = None if math.isnan(s_hot) else s_hot
            sub_map['song_hotttnesss'] = [s_hot]
            sub_map['start_of_fade_out'] = [hdf5_getters.get_start_of_fade_out(h5,i)]
            sub_map['tempo'] = [hdf5_getters.get_tempo(h5,i)]
            #should time signature be count as well? binary?
            sub_map['time_signature'] = [hdf5_getters.get_time_signature(h5,i)]
            sub_map['track_id'] = [hdf5_getters.get_track_id(h5,i)]
            #should year be binary since they can have many songs across years and should it be year:count
            sub_map['year'] = [yr]

            artist_map[artist_name] = sub_map
        else:
            #artist already exists, so get its map and update song fields
            dance = hdf5_getters.get_danceability(h5,i)
            dance = None if dance == 0.0 else dance
            energy = hdf5_getters.get_energy(h5,i)
            energy = None if energy == 0.0 else energy
            artist_map[artist_name]['danceability'].append(dance)
            artist_map[artist_name]['duration'].append(hdf5_getters.get_duration(h5,i))
            artist_map[artist_name]['end_of_fade_in'].append(hdf5_getters.get_end_of_fade_in(h5,i))
            artist_map[artist_name]['energy'].append(energy)
            artist_map[artist_name]['key'].append(hdf5_getters.get_key(h5,i))
            artist_map[artist_name]['loudness'].append(hdf5_getters.get_loudness(h5,i))
            artist_map[artist_name]['mode'].append(hdf5_getters.get_mode(h5,i)) #major or minor 0/1
            s_hot = hdf5_getters.get_song_hotttnesss(h5,i)
            s_hot = None if math.isnan(s_hot) else s_hot
            artist_map[artist_name]['song_hotttnesss'].append(s_hot)
            artist_map[artist_name]['start_of_fade_out'].append(hdf5_getters.get_start_of_fade_out(h5,i))
            artist_map[artist_name]['tempo'].append(hdf5_getters.get_tempo(h5,i))
            #should time signature be count as well? binary?
            artist_map[artist_name]['time_signature'].append(hdf5_getters.get_time_signature(h5,i))
            artist_map[artist_name]['track_id'].append(hdf5_getters.get_track_id(h5,i))
            #should year be binary since they can have many songs across years and should it be year:count
            artist_map[artist_name]['year'].append(yr)
Exemple #35
0
def main():
    outputFile1 = open('SongCSVFinal.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input(
                "\n\nIn what order would you like the colums of the CSV file?\n"
                + "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"
                +
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo,"
                +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n"
                +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n"
                + "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user,
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = (
            "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"
            +
            "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature," +
            "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"
            +
            "Title,Year,mbID,Energy,ArtistFamiliarity,Hotness,end_of_fade_in,key,keyConfidence,Loudness,"
            + "mode,mode_confidence,start_of_fade_out")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,")
        outputFile1.write(csvRowString + "\n")
        csvRowString = ""

    #################################################

    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "."  # "." As the default means the current directory
    ext = ".h5"  #Set the extension here. H5 is the extension for HDF5 files.
    #################################################
    #files = glob.glob(os.path.join(root,'*'+ext))
    #FOR LOOP
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        print(files)
        for f in files:
            songH5File = hdf5_getters.open_h5_file_read(f)
            print('hello 1')
            print(songH5File)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            # print type(testDanceability)
            # print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(
                hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(
                hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(
                hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            # song.setGenreList()
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            # song.lyrics = None
            # song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(
                hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(
                hdf5_getters.get_time_signature_confidence(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            song.mbID = str(hdf5_getters.get_artist_mbid(songH5File))
            song.energy = str(hdf5_getters.get_energy(songH5File))
            #song.beatConfidence = str(hdf5_getters.get_beats_confidence(songH5File))
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.loudness = str(hdf5_getters.get_loudness(songH5File))
            song.key = str(hdf5_getters.get_key(songH5File))
            song.artistFamiliarity = str(
                hdf5_getters.get_artist_familiarity(songH5File))
            song.keyConfidence = str(
                hdf5_getters.get_key_confidence(songH5File))
            song.hotness = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            #song.sampleRate= str(hdf5_getters.get_analysis_sample_rate(songH5File))
            song.end_of_fade_in = str(
                hdf5_getters.get_end_of_fade_in(songH5File))
            song.mode = str(hdf5_getters.get_mode(songH5File))
            song.mode_confidence = str(
                hdf5_getters.get_mode_confidence(songH5File))
            song.start_of_fade_out = str(
                hdf5_getters.get_start_of_fade_out(songH5File))

            #print song count
            csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',', "")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',', '')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'mbID'.lower():
                    csvRowString += song.mbID
                elif attribute == 'Energy'.lower():
                    csvRowString += song.energy
                elif attribute == 'ArtistFamiliarity'.lower():
                    csvRowString += song.artistFamiliarity
                elif attribute == 'Hotness'.lower():
                    csvRowString += song.hotness

#elif attribute == 'SampleRate'.lower():
#csvRowString += song.sampleRate
                elif attribute == 'end_of_fade_in'.lower():
                    csvRowString += song.end_of_fade_in
                elif attribute == 'key'.lower():
                    csvRowString += song.key
                elif attribute == 'keyConfidence'.lower():
                    csvRowString += song.keyConfidence
                elif attribute == 'Loudness'.lower():
                    csvRowString += song.loudness
                elif attribute == 'mode'.lower():
                    csvRowString += song.mode
                elif attribute == 'mode_confidence'.lower():
                    csvRowString += song.mode_confidence
                elif attribute == 'start_of_fade_out'.lower():
                    csvRowString += song.start_of_fade_out
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex - 1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
def apply_to_all_files_mod(basedir,
                           song_list,
                           filename='songs.npy',
                           func=lambda x: x,
                           ext='.h5'):
    """
    From a base directory, goes through all subdirectories, finds all files with the given ext,
    and reads each song from each file. For each song in song_list, gets the title, artist, tempo, 
    familiarity, hottness, terms, dancebility, duration, energy, loudness, and the timbre matrix. 
    Tab delimits terms, flattens the timbre matrix, adds them all to a np array, and saves the array 
    with the information from each song to filename. 
    """
    #Initial list of desired song info
    csv_data = []
    count = 0
    done_gg = False
    song_dict = construct_song_dict(song_list)
    # iterate over all files in all subdirectories
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        #Iterates through each file in files
        for filename in files:
            count += 1
            if count % 1000 == 0:
                print count
            h5 = GETTERS.open_h5_file_read(filename)
            #Scrapes desired data
            title = GETTERS.get_title(h5)
            artist = GETTERS.get_artist_name(h5)
            tempo = GETTERS.get_tempo(h5)
            familiarity = GETTERS.get_artist_familiarity(h5)
            hotness = GETTERS.get_artist_hotttnesss(h5)
            terms = GETTERS.get_artist_terms(h5)
            danceability = GETTERS.get_danceability(h5)
            duration = GETTERS.get_duration(h5)
            energy = GETTERS.get_energy(h5)
            loudness = GETTERS.get_loudness(h5)
            timbre = GETTERS.get_segments_timbre(h5)
            #Tab delimits terms
            terms_tabs = "\t".join(terms)
            #Flattens timbre
            timbre_flattened = timbre.flatten()
            #Creates np array of everything but timbre matrix
            everything_but_timbre = np.array([
                title, artist, tempo, familiarity, hotness, terms_tabs,
                danceability, duration, energy, loudness
            ])
            #Combines everything else with timbre matrix
            row = np.concatenate((everything_but_timbre, timbre_flattened))
            #Checks if artist, song combination was in the list and, if so, adds it.
            if artist in song_dict:
                if title in song_dict[artist]:
                    print("Adding {} by {}. Song ID is: {}".format(
                        title, artist, GETTERS.get_song_id(h5)))
                    csv_data.append(row)
                    #Prevents duplicates
                    song_dict[artist][song_dict[artist].index(title)] = ''
            h5.close()

    print("Number of songs: {}, artists {}".format(len(csv_data),
                                                   len(song_dict)))
    csv_array = np.array(csv_data)
    #Saves data
    np.save(filename, csv_array)
Exemple #37
0
def fill_attributes(song, songH5File):

    #----------------------------non array attributes-------------------------------
    song.analysisSampleRate = str(
        hdf5_getters.get_analysis_sample_rate(songH5File))
    song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File))
    song.artistFamiliarity = str(
        hdf5_getters.get_artist_familiarity(songH5File))
    song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File))
    song.artistID = str(hdf5_getters.get_artist_id(songH5File))
    song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
    song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
    song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
    song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File))
    song.artistName = str(hdf5_getters.get_artist_name(songH5File))
    song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File))
    song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File))
    song.danceability = str(hdf5_getters.get_danceability(songH5File))
    song.duration = str(hdf5_getters.get_duration(songH5File))
    song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File))
    song.energy = str(hdf5_getters.get_energy(songH5File))
    song.key = str(hdf5_getters.get_key(songH5File))
    song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_segments_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_sections_confidence(songH5File))
    song.loudness = str(hdf5_getters.get_loudness(songH5File))
    song.mode = str(hdf5_getters.get_mode(songH5File))
    song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File))
    song.release = str(hdf5_getters.get_release(songH5File))
    song.releaseDigitalID = str(
        hdf5_getters.get_release_7digitalid(songH5File))
    song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
    song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File))
    song.tempo = str(hdf5_getters.get_tempo(songH5File))
    song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
    song.timeSignatureConfidence = str(
        hdf5_getters.get_time_signature_confidence(songH5File))
    song.title = str(hdf5_getters.get_title(songH5File))
    song.trackID = str(hdf5_getters.get_track_id(songH5File))
    song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File))
    song.year = str(hdf5_getters.get_year(songH5File))

    #-------------------------------array attributes--------------------------------------
    #array float
    song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_start(songH5File))
    #array float
    song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_freq(songH5File))
    #array float
    song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_weight(songH5File))
    #array int
    song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_mbtags_count(songH5File))
    #array float
    song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_confidence(songH5File))
    #array float
    song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_start(songH5File))
    #array float
    song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_confidence(songH5File))
    #array float
    song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_confidence(songH5File))
    #array float
    song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_start(songH5File))
    #array float
    song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_confidence(songH5File))
    #array float
    song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max(songH5File))
    #array float
    song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max_time(songH5File))
    #array float
    song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_start(songH5File))
    #array float
    song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_start(songH5File))
    #array float
    song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_confidence(songH5File))
    #array float
    song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_start(songH5File))
    #array2d float
    song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_timbre(songH5File))
    #array2d float
    song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_pitches(songH5File))

    #------------------------array string attributes------------------------
    song.similarArtists = convert_array_to_string(
        hdf5_getters.get_similar_artists(songH5File))  #array string
    song.artistTerms = convert_array_to_string(
        hdf5_getters.get_artist_terms(songH5File))  #array string
    song.artistmbTags = convert_array_to_string(
        hdf5_getters.get_artist_mbtags(songH5File))  #array string

    return song
def data_to_flat_file(basedir, ext='.h5'):
    """ This function extracts the information from the tables and creates the flat file. """
    count = 0
    #song counter
    list_to_write = []
    group_index = 0
    row_to_write = ""
    writer = csv.writer(open("complete.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            row = []
            print f
            h5 = hdf5_getters.open_h5_file_read(f)
            title = hdf5_getters.get_title(h5)
            title = title.replace('"', '')
            row.append(title)
            comma = title.find(',')
            if comma != -1:
                print title
                time.sleep(1)
            album = hdf5_getters.get_release(h5)
            album = album.replace('"', '')
            row.append(album)
            comma = album.find(',')
            if comma != -1:
                print album
                time.sleep(1)
            artist_name = hdf5_getters.get_artist_name(h5)
            comma = artist_name.find(',')
            if comma != -1:
                print artist_name
                time.sleep(1)
            artist_name = artist_name.replace('"', '')
            row.append(artist_name)
            duration = hdf5_getters.get_duration(h5)
            row.append(duration)
            samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            row.append(samp_rt)
            artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            row.append(artist_7digitalid)
            artist_fam = hdf5_getters.get_artist_familiarity(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_fam) == True:
                artist_fam = -1
            row.append(artist_fam)
            artist_hotness = hdf5_getters.get_artist_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_hotness) == True:
                artist_hotness = -1
            row.append(artist_hotness)
            artist_id = hdf5_getters.get_artist_id(h5)
            row.append(artist_id)
            artist_lat = hdf5_getters.get_artist_latitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lat) == True:
                artist_lat = -1
            row.append(artist_lat)
            artist_loc = hdf5_getters.get_artist_location(h5)
            row.append(artist_loc)
            artist_lon = hdf5_getters.get_artist_longitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lon) == True:
                artist_lon = -1
            row.append(artist_lon)
            artist_mbid = hdf5_getters.get_artist_mbid(h5)
            row.append(artist_mbid)

            #Getting the genre
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
            trn_wght = hdf5_getters.get_artist_terms_weight(h5)
            a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
            genre_indexes = get_genre_indexes(
                trm_freq)  #index of the highest freq
            genre_set = 0  #flag to see if the genre has been set or not
            final_genre = []
            genres_so_far = []
            for i in range(len(genre_indexes)):
                genre_tmp = get_genre(
                    art_trm, genre_indexes[i]
                )  #genre that corresponds to the highest freq
                genres_so_far = genre_dict.get_genre_in_dict(
                    genre_tmp)  #getting the genre from the dictionary
                if len(genres_so_far) != 0:
                    for i in genres_so_far:
                        final_genre.append(i)
                        genre_set = 1

            if genre_set == 1:
                col_num = []
                for i in final_genre:
                    column = int(i)  #getting the column number of the genre
                    col_num.append(column)

                genre_array = genre_columns(col_num)  #genre array
                for i in range(len(
                        genre_array)):  #appending the genre_array to the row
                    row.append(genre_array[i])
            else:
                genre_array = genre_columns(
                    -1
                )  #when there is no genre matched, return an array of [0...0]
                for i in range(len(
                        genre_array)):  #appending the genre_array to the row
                    row.append(genre_array[i])

            artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            row.append(artist_pmid)
            audio_md5 = hdf5_getters.get_audio_md5(h5)
            row.append(audio_md5)
            danceability = hdf5_getters.get_danceability(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(danceability) == True:
                danceability = -1
            row.append(danceability)
            end_fade_in = hdf5_getters.get_end_of_fade_in(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(end_fade_in) == True:
                end_fade_in = -1
            row.append(end_fade_in)
            energy = hdf5_getters.get_energy(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(energy) == True:
                energy = -1
            row.append(energy)
            song_key = hdf5_getters.get_key(h5)
            row.append(song_key)
            key_c = hdf5_getters.get_key_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(key_c) == True:
                key_c = -1
            row.append(key_c)
            loudness = hdf5_getters.get_loudness(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(loudness) == True:
                loudness = -1
            row.append(loudness)
            mode = hdf5_getters.get_mode(h5)
            row.append(mode)
            mode_conf = hdf5_getters.get_mode_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(mode_conf) == True:
                mode_conf = -1
            row.append(mode_conf)
            release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            row.append(release_7digitalid)
            song_hot = hdf5_getters.get_song_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(song_hot) == True:
                song_hot = -1
            row.append(song_hot)
            song_id = hdf5_getters.get_song_id(h5)
            row.append(song_id)
            start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            row.append(start_fade_out)
            tempo = hdf5_getters.get_tempo(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(tempo) == True:
                tempo = -1
            row.append(tempo)
            time_sig = hdf5_getters.get_time_signature(h5)
            row.append(time_sig)
            time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(time_sig_c) == True:
                time_sig_c = -1
            row.append(time_sig_c)
            track_id = hdf5_getters.get_track_id(h5)
            row.append(track_id)
            track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            row.append(track_7digitalid)
            year = hdf5_getters.get_year(h5)
            row.append(year)
            bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_start = hdf5_getters.get_bars_start(h5)
            row_bars_padding = padding(
                245
            )  #this is the array that will be attached at the end of th row

            #--------------bars---------------"
            gral_info = []
            gral_info = row[:]
            empty = []
            for i, item in enumerate(bars_c):
                row.append(group_index)
                row.append(i)
                row.append(bars_c[i])
                bars_c_avg = get_avg(bars_c)
                row.append(bars_c_avg)
                bars_c_max = get_max(bars_c)
                row.append(bars_c_max)
                bars_c_min = get_min(bars_c)
                row.append(bars_c_min)
                bars_c_stddev = get_stddev(bars_c)
                row.append(bars_c_stddev)
                bars_c_count = get_count(bars_c)
                row.append(bars_c_count)
                bars_c_sum = get_sum(bars_c)
                row.append(bars_c_sum)
                row.append(bars_start[i])
                bars_start_avg = get_avg(bars_start)
                row.append(bars_start_avg)
                bars_start_max = get_max(bars_start)
                row.append(bars_start_max)
                bars_start_min = get_min(bars_start)
                row.append(bars_start_min)
                bars_start_stddev = get_stddev(bars_start)
                row.append(bars_start_stddev)
                bars_start_count = get_count(bars_start)
                row.append(bars_start_count)
                bars_start_sum = get_sum(bars_start)
                row.append(bars_start_sum)
                for i in row_bars_padding:
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    #--------beats---------------"
            beats_c = hdf5_getters.get_beats_confidence(h5)
            group_index = 1
            row = []
            row = gral_info[:]
            row_front = padding(
                14)  #blanks left in front of the row(empty spaces for bars)
            row_beats_padding = padding(231)
            for i, item in enumerate(beats_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the beats
                    row.append(index)

                row.append(beats_c[i])
                beats_c_avg = get_avg(beats_c)
                row.append(beats_c_avg)
                beats_c_max = get_max(beats_c)
                row.append(beats_c_max)
                beats_c_min = get_min(beats_c)
                row.append(beats_c_min)
                beats_c_stddev = get_stddev(beats_c)
                row.append(beats_c_stddev)
                beats_c_count = get_count(beats_c)
                row.append(beats_c_count)
                beats_c_sum = get_sum(beats_c)
                row.append(beats_c_sum)
                beats_start = hdf5_getters.get_beats_start(h5)
                row.append(beats_start[i])
                beats_start_avg = get_avg(beats_start)
                row.append(beats_start_avg)
                beats_start_max = get_max(beats_start)
                row.append(beats_start_max)
                beats_start_min = get_min(beats_start)
                row.append(beats_start_min)
                beats_start_stddev = get_stddev(beats_start)
                row.append(beats_start_stddev)
                beats_start_count = get_count(beats_start)
                row.append(beats_start_count)
                beats_start_sum = get_sum(beats_start)
                row.append(beats_start_sum)
                for i in row_beats_padding:
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    # "--------sections---------------"
            row_sec_padding = padding(
                217)  #blank spaces left at the end of the row
            sec_c = hdf5_getters.get_sections_confidence(h5)
            group_index = 2
            row = []
            row = gral_info[:]
            row_front = padding(
                28)  #blank spaces left in front(empty spaces for bars,beats)
            for i, item in enumerate(sec_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the sections
                    row.append(index)

                row.append(sec_c[i])
                sec_c_avg = get_avg(sec_c)
                row.append(sec_c_avg)
                sec_c_max = get_max(sec_c)
                row.append(sec_c_max)
                sec_c_min = get_min(sec_c)
                row.append(sec_c_min)
                sec_c_stddev = get_stddev(sec_c)
                row.append(sec_c_stddev)
                sec_c_count = get_count(sec_c)
                row.append(sec_c_count)
                sec_c_sum = get_sum(sec_c)
                row.append(sec_c_sum)
                sec_start = hdf5_getters.get_sections_start(h5)
                row.append(sec_start[i])
                sec_start_avg = get_avg(sec_start)
                row.append(sec_start_avg)
                sec_start_max = get_max(sec_start)
                row.append(sec_start_max)
                sec_start_min = get_min(sec_start)
                row.append(sec_start_min)
                sec_start_stddev = get_stddev(sec_start)
                row.append(sec_start_stddev)
                sec_start_count = get_count(sec_start)
                row.append(sec_start_count)
                sec_start_sum = get_sum(sec_start)
                row.append(sec_start_sum)
                for i in row_sec_padding:  #appending the blank spaces at the end of the row
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

    #--------segments---------------"
            row_seg_padding = padding(182)  #blank spaces at the end of the row
            row_front = padding(42)  #blank spaces left in front of segments
            seg_c = hdf5_getters.get_segments_confidence(h5)
            group_index = 3
            row = []
            row = gral_info[:]
            for i, item in enumerate(seg_c):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of the segments
                    row.append(index)

                row.append(seg_c[i])
                seg_c_avg = get_avg(seg_c)
                row.append(seg_c_avg)
                seg_c_max = get_max(seg_c)
                row.append(seg_c_max)
                seg_c_min = get_min(seg_c)
                row.append(seg_c_min)
                seg_c_stddev = get_stddev(seg_c)
                row.append(seg_c_stddev)
                seg_c_count = get_count(seg_c)
                row.append(seg_c_count)
                seg_c_sum = get_sum(seg_c)
                row.append(seg_c_sum)
                seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
                row.append(seg_loud_max[i])
                seg_loud_max_avg = get_avg(seg_loud_max)
                row.append(seg_loud_max_avg)
                seg_loud_max_max = get_max(seg_loud_max)
                row.append(seg_loud_max_max)
                seg_loud_max_min = get_min(seg_loud_max)
                row.append(seg_loud_max_min)
                seg_loud_max_stddev = get_stddev(seg_loud_max)
                row.append(seg_loud_max_stddev)
                seg_loud_max_count = get_count(seg_loud_max)
                row.append(seg_loud_max_count)
                seg_loud_max_sum = get_sum(seg_loud_max)
                row.append(seg_loud_max_sum)
                seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(
                    h5)
                row.append(seg_loud_max_time[i])
                seg_loud_max_time_avg = get_avg(seg_loud_max_time)
                row.append(seg_loud_max_time_avg)
                seg_loud_max_time_max = get_max(seg_loud_max_time)
                row.append(seg_loud_max_time_max)
                seg_loud_max_time_min = get_min(seg_loud_max_time)
                row.append(seg_loud_max_time_min)
                seg_loud_max_time_stddev = get_stddev(seg_loud_max_time)
                row.append(seg_loud_max_time_stddev)
                seg_loud_max_time_count = get_count(seg_loud_max_time)
                row.append(seg_loud_max_time_count)
                seg_loud_max_time_sum = get_sum(seg_loud_max_time)
                row.append(seg_loud_max_time_sum)
                seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
                row.append(seg_loud_start[i])
                seg_loud_start_avg = get_avg(seg_loud_start)
                row.append(seg_loud_start_avg)
                seg_loud_start_max = get_max(seg_loud_start)
                row.append(seg_loud_start_max)
                seg_loud_start_min = get_min(seg_loud_start)
                row.append(seg_loud_start_min)
                seg_loud_start_stddev = get_stddev(seg_loud_start)
                row.append(seg_loud_start_stddev)
                seg_loud_start_count = get_count(seg_loud_start)
                row.append(seg_loud_start_count)
                seg_loud_start_sum = get_sum(seg_loud_start)
                row.append(seg_loud_start_sum)
                seg_start = hdf5_getters.get_segments_start(h5)
                row.append(seg_start[i])
                seg_start_avg = get_avg(seg_start)
                row.append(seg_start_avg)
                seg_start_max = get_max(seg_start)
                row.append(seg_start_max)
                seg_start_min = get_min(seg_start)
                row.append(seg_start_min)
                seg_start_stddev = get_stddev(seg_start)
                row.append(seg_start_stddev)
                seg_start_count = get_count(seg_start)
                row.append(seg_start_count)
                seg_start_sum = get_sum(seg_start)
                row.append(seg_start_sum)
                for i in row_seg_padding:  #appending blank spaces at the end of the row
                    row.append(i)

                writer.writerow(row)
                row = []
                row = gral_info[:]

            #----------segments pitch and timbre---------------"
            row_seg2_padding = padding(
                14)  #blank spaces left at the end of the row
            row_front = padding(
                77)  #blank spaces left at the front of the segments and timbre
            seg_pitch = hdf5_getters.get_segments_pitches(h5)
            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            group_index = 4
            row = []
            row = gral_info[:]
            for i, item in enumerate(transpose_pitch[0]):
                row.append(group_index)
                row.append(i)
                for index in row_front:  #padding blanks in front of segments and timbre
                    row.append(index)

                row.append(transpose_pitch[0][i])
                seg_pitch_avg = get_avg(transpose_pitch[0])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[0])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[0])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[0])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[0])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[0])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[1][i])
                seg_pitch_avg = get_avg(transpose_pitch[1])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[1])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[1])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[1])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[1])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[1])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[2][i])
                seg_pitch_avg = get_avg(transpose_pitch[2])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[2])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[2])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[2])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[2])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[2])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[3][i])
                seg_pitch_avg = get_avg(transpose_pitch[3])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[3])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[3])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[3])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[3])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[3])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[4][i])
                seg_pitch_avg = get_avg(transpose_pitch[4])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[4])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[4])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[4])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[4])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[4])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[5][i])
                seg_pitch_avg = get_avg(transpose_pitch[5])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[5])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[5])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[5])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[5])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[5])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[6][i])
                seg_pitch_avg = get_avg(transpose_pitch[6])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[6])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[6])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[6])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[6])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[6])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[7][i])
                seg_pitch_avg = get_avg(transpose_pitch[7])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[7])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[7])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[7])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[7])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[7])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[8][i])
                seg_pitch_avg = get_avg(transpose_pitch[8])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[8])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[8])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[8])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[8])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[8])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[9][i])
                seg_pitch_avg = get_avg(transpose_pitch[9])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[9])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[9])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[9])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[9])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[9])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[10][i])
                seg_pitch_avg = get_avg(transpose_pitch[10])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[10])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[10])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[10])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[10])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[10])
                row.append(seg_pitch_sum)
                row.append(transpose_pitch[11][i])
                seg_pitch_avg = get_avg(transpose_pitch[11])
                row.append(seg_pitch_avg)
                seg_pitch_max = get_max(transpose_pitch[11])
                row.append(seg_pitch_max)
                seg_pitch_min = get_min(transpose_pitch[11])
                row.append(seg_pitch_min)
                seg_pitch_stddev = get_stddev(transpose_pitch[11])
                row.append(seg_pitch_stddev)
                seg_pitch_count = get_count(transpose_pitch[11])
                row.append(seg_pitch_count)
                seg_pitch_sum = get_sum(transpose_pitch[11])
                row.append(seg_pitch_sum)
                #timbre arrays
                seg_timbre = hdf5_getters.get_segments_timbre(h5)
                transpose_timbre = seg_pitch.transpose(
                )  #tranposing matrix, to have 12 rows
                row.append(transpose_timbre[0][i])
                seg_timbre_avg = get_avg(transpose_timbre[0])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[0])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[0])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[0])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[0])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[0])
                row.append(seg_timbre_sum)
                row.append(transpose_timbre[1][i])
                seg_timbre_avg = get_avg(transpose_timbre[1])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[1])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[1])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[1])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[1])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[1])
                row.append(seg_timbre_sum)
                row.append(transpose_timbre[2][i])
                seg_timbre_avg = get_avg(transpose_timbre[2])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[2])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[2])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[2])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[2])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[2])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[3][i])
                seg_timbre_avg = get_avg(transpose_timbre[3])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[3])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[3])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[3])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[3])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[3])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[4][i])
                seg_timbre_avg = get_avg(transpose_timbre[4])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[4])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[4])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[4])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[4])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[4])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[5][i])
                seg_timbre_avg = get_avg(transpose_timbre[5])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[5])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[5])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[5])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[5])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[5])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[6][i])
                seg_timbre_avg = get_avg(transpose_timbre[6])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[6])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[6])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[6])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[6])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[6])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[7][i])
                seg_timbre_avg = get_avg(transpose_timbre[7])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[7])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[7])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[7])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[7])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[7])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[8][i])
                seg_timbre_avg = get_avg(transpose_timbre[8])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[8])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[8])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[8])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[8])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[8])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[9][i])
                seg_timbre_avg = get_avg(transpose_timbre[9])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[9])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[9])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[9])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[9])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[9])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[10][i])
                seg_timbre_avg = get_avg(transpose_timbre[10])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[10])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[10])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[10])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[10])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[10])
                row.append(seg_timbre_sum)

                row.append(transpose_timbre[11][i])
                seg_timbre_avg = get_avg(transpose_timbre[11])
                row.append(seg_timbre_avg)
                seg_timbre_max = get_max(transpose_timbre[11])
                row.append(seg_timbre_max)
                seg_timbre_min = get_min(transpose_timbre[11])
                row.append(seg_timbre_min)
                seg_timbre_stddev = get_stddev(transpose_timbre[11])
                row.append(seg_timbre_stddev)
                seg_timbre_count = get_count(transpose_timbre[11])
                row.append(seg_timbre_count)
                seg_timbre_sum = get_sum(transpose_timbre[11])
                row.append(seg_timbre_sum)
                for item in row_seg2_padding:
                    row.append(item)
                writer.writerow(row)
                row = []
                row = gral_info[:]

    # "--------tatums---------------"
            tatms_c = hdf5_getters.get_tatums_confidence(h5)
            group_index = 5
            row_front = padding(245)  #blank spaces left in front of tatums
            row = []
            row = gral_info[:]
            for i, item in enumerate(tatms_c):
                row.append(group_index)
                row.append(i)
                for item in row_front:  #appending blank spaces at the front of the row
                    row.append(item)

                row.append(tatms_c[i])
                tatms_c_avg = get_avg(tatms_c)
                row.append(tatms_c_avg)
                tatms_c_max = get_max(tatms_c)
                row.append(tatms_c_max)
                tatms_c_min = get_min(tatms_c)
                row.append(tatms_c_min)
                tatms_c_stddev = get_stddev(tatms_c)
                row.append(tatms_c_stddev)
                tatms_c_count = get_count(tatms_c)
                row.append(tatms_c_count)
                tatms_c_sum = get_sum(tatms_c)
                row.append(tatms_c_sum)
                tatms_start = hdf5_getters.get_tatums_start(h5)
                row.append(tatms_start[i])
                tatms_start_avg = get_avg(tatms_start)
                row.append(tatms_start_avg)
                tatms_start_max = get_max(tatms_start)
                row.append(tatms_start_max)
                tatms_start_min = get_min(tatms_start)
                row.append(tatms_start_min)
                tatms_start_stddev = get_stddev(tatms_start)
                row.append(tatms_start_stddev)
                tatms_start_count = get_count(tatms_start)
                row.append(tatms_start_count)
                tatms_start_sum = get_sum(tatms_start)
                row.append(tatms_start_sum)
                writer.writerow(row)
                row = []
                row = gral_info[:]

            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_pitch_avg = []
            seg_pitch_max = []
            seg_pitch_min = []
            seg_pitch_stddev = []
            seg_pitch_count = []
            seg_pitch_sum = []
            i = 0
            #Getting the aggregate values in the pitches array
            for row in transpose_pitch:
                seg_pitch_avg.append(get_avg(row))
                seg_pitch_max.append(get_max(row))
                seg_pitch_min.append(get_min(row))
                seg_pitch_stddev.append(get_stddev(row))
                seg_pitch_count.append(get_count(row))
                seg_pitch_sum.append(get_sum(row))
                i = i + 1

            #extracting information from the timbre array
            transpose_timbre = seg_pitch.transpose(
            )  #tranposing matrix, to have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_timbre_avg = []
            seg_timbre_max = []
            seg_timbre_min = []
            seg_timbre_stddev = []
            seg_timbre_count = []
            seg_timbre_sum = []
            i = 0
            for row in transpose_timbre:
                seg_timbre_avg.append(get_avg(row))
                seg_timbre_max.append(get_max(row))
                seg_timbre_min.append(get_min(row))
                seg_timbre_stddev.append(get_stddev(row))
                seg_timbre_count.append(get_count(row))
                seg_timbre_sum.append(get_sum(row))
                i = i + 1

            h5.close()
            count = count + 1
            print count
def data_to_flat_file(basedir, ext='.h5'):
    """This function extract the information from the tables and creates the flat file."""
    count = 0
    #song counter
    list_to_write = []
    row_to_write = ""
    writer = csv.writer(open("metadata_wholeA.csv", "wb"))
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root, '*' + ext))
        for f in files:
            print f  #the name of the file
            h5 = hdf5_getters.open_h5_file_read(f)
            title = hdf5_getters.get_title(h5)
            title = title.replace('"', '')
            comma = title.find(',')  #eliminating commas in the title
            if comma != -1:
                print title
                time.sleep(1)
            album = hdf5_getters.get_release(h5)
            album = album.replace('"', '')  #eliminating commas in the album
            comma = album.find(',')
            if comma != -1:
                print album
                time.sleep(1)
            artist_name = hdf5_getters.get_artist_name(h5)
            comma = artist_name.find(',')
            if comma != -1:
                print artist_name
                time.sleep(1)
            artist_name = artist_name.replace('"',
                                              '')  #eliminating double quotes
            duration = hdf5_getters.get_duration(h5)
            samp_rt = hdf5_getters.get_analysis_sample_rate(h5)
            artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5)
            artist_fam = hdf5_getters.get_artist_familiarity(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_fam) == True:
                artist_fam = -1
            artist_hotness = hdf5_getters.get_artist_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_hotness) == True:
                artist_hotness = -1
            artist_id = hdf5_getters.get_artist_id(h5)
            artist_lat = hdf5_getters.get_artist_latitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lat) == True:
                artist_lat = -1
            artist_loc = hdf5_getters.get_artist_location(h5)
            #checks artist_loc to see if it is a hyperlink if it is set as empty string
            artist_loc = artist_loc.replace(",", "\,")
            if artist_loc.startswith("<a"):
                artist_loc = ""
            if len(artist_loc) > 100:
                artist_loc = ""
            artist_lon = hdf5_getters.get_artist_longitude(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(artist_lon) == True:
                artist_lon = -1
            artist_mbid = hdf5_getters.get_artist_mbid(h5)
            artist_pmid = hdf5_getters.get_artist_playmeid(h5)
            audio_md5 = hdf5_getters.get_audio_md5(h5)
            danceability = hdf5_getters.get_danceability(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(danceability) == True:
                danceability = -1
            end_fade_in = hdf5_getters.get_end_of_fade_in(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(end_fade_in) == True:
                end_fade_in = -1
            energy = hdf5_getters.get_energy(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(energy) == True:
                energy = -1
            song_key = hdf5_getters.get_key(h5)
            key_c = hdf5_getters.get_key_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(key_c) == True:
                key_c = -1
            loudness = hdf5_getters.get_loudness(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(loudness) == True:
                loudness = -1
            mode = hdf5_getters.get_mode(h5)
            mode_conf = hdf5_getters.get_mode_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(mode_conf) == True:
                mode_conf = -1
            release_7digitalid = hdf5_getters.get_release_7digitalid(h5)
            song_hot = hdf5_getters.get_song_hotttnesss(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(song_hot) == True:
                song_hot = -1
            song_id = hdf5_getters.get_song_id(h5)
            start_fade_out = hdf5_getters.get_start_of_fade_out(h5)
            tempo = hdf5_getters.get_tempo(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(tempo) == True:
                tempo = -1
            time_sig = hdf5_getters.get_time_signature(h5)
            time_sig_c = hdf5_getters.get_time_signature_confidence(h5)
            #checking if we get a "nan" if we do we change it to -1
            if numpy.isnan(time_sig_c) == True:
                time_sig_c = -1
            track_id = hdf5_getters.get_track_id(h5)
            track_7digitalid = hdf5_getters.get_track_7digitalid(h5)
            year = hdf5_getters.get_year(h5)
            bars_c = hdf5_getters.get_bars_confidence(h5)
            bars_c_avg = get_avg(bars_c)
            bars_c_max = get_max(bars_c)
            bars_c_min = get_min(bars_c)
            bars_c_stddev = get_stddev(bars_c)
            bars_c_count = get_count(bars_c)
            bars_c_sum = get_sum(bars_c)
            bars_start = hdf5_getters.get_bars_start(h5)
            bars_start_avg = get_avg(bars_start)
            bars_start_max = get_max(bars_start)
            bars_start_min = get_min(bars_start)
            bars_start_stddev = get_stddev(bars_start)
            bars_start_count = get_count(bars_start)
            bars_start_sum = get_sum(bars_start)
            beats_c = hdf5_getters.get_beats_confidence(h5)
            beats_c_avg = get_avg(beats_c)
            beats_c_max = get_max(beats_c)
            beats_c_min = get_min(beats_c)
            beats_c_stddev = get_stddev(beats_c)
            beats_c_count = get_count(beats_c)
            beats_c_sum = get_sum(beats_c)
            beats_start = hdf5_getters.get_beats_start(h5)
            beats_start_avg = get_avg(beats_start)
            beats_start_max = get_max(beats_start)
            beats_start_min = get_min(beats_start)
            beats_start_stddev = get_stddev(beats_start)
            beats_start_count = get_count(beats_start)
            beats_start_sum = get_sum(beats_start)
            sec_c = hdf5_getters.get_sections_confidence(h5)
            sec_c_avg = get_avg(sec_c)
            sec_c_max = get_max(sec_c)
            sec_c_min = get_min(sec_c)
            sec_c_stddev = get_stddev(sec_c)
            sec_c_count = get_count(sec_c)
            sec_c_sum = get_sum(sec_c)
            sec_start = hdf5_getters.get_sections_start(h5)
            sec_start_avg = get_avg(sec_start)
            sec_start_max = get_max(sec_start)
            sec_start_min = get_min(sec_start)
            sec_start_stddev = get_stddev(sec_start)
            sec_start_count = get_count(sec_start)
            sec_start_sum = get_sum(sec_start)
            seg_c = hdf5_getters.get_segments_confidence(h5)
            seg_c_avg = get_avg(seg_c)
            seg_c_max = get_max(seg_c)
            seg_c_min = get_min(seg_c)
            seg_c_stddev = get_stddev(seg_c)
            seg_c_count = get_count(seg_c)
            seg_c_sum = get_sum(seg_c)
            seg_loud_max = hdf5_getters.get_segments_loudness_max(h5)
            seg_loud_max_avg = get_avg(seg_loud_max)
            seg_loud_max_max = get_max(seg_loud_max)
            seg_loud_max_min = get_min(seg_loud_max)
            seg_loud_max_stddev = get_stddev(seg_loud_max)
            seg_loud_max_count = get_count(seg_loud_max)
            seg_loud_max_sum = get_sum(seg_loud_max)
            seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5)
            seg_loud_max_time_avg = get_avg(seg_loud_max_time)
            seg_loud_max_time_max = get_max(seg_loud_max_time)
            seg_loud_max_time_min = get_min(seg_loud_max_time)
            seg_loud_max_time_stddev = get_stddev(seg_loud_max_time)
            seg_loud_max_time_count = get_count(seg_loud_max_time)
            seg_loud_max_time_sum = get_sum(seg_loud_max_time)
            seg_loud_start = hdf5_getters.get_segments_loudness_start(h5)
            seg_loud_start_avg = get_avg(seg_loud_start)
            seg_loud_start_max = get_max(seg_loud_start)
            seg_loud_start_min = get_min(seg_loud_start)
            seg_loud_start_stddev = get_stddev(seg_loud_start)
            seg_loud_start_count = get_count(seg_loud_start)
            seg_loud_start_sum = get_sum(seg_loud_start)
            seg_pitch = hdf5_getters.get_segments_pitches(h5)
            pitch_size = len(seg_pitch)
            seg_start = hdf5_getters.get_segments_start(h5)
            seg_start_avg = get_avg(seg_start)
            seg_start_max = get_max(seg_start)
            seg_start_min = get_min(seg_start)
            seg_start_stddev = get_stddev(seg_start)
            seg_start_count = get_count(seg_start)
            seg_start_sum = get_sum(seg_start)
            seg_timbre = hdf5_getters.get_segments_timbre(h5)
            tatms_c = hdf5_getters.get_tatums_confidence(h5)
            tatms_c_avg = get_avg(tatms_c)
            tatms_c_max = get_max(tatms_c)
            tatms_c_min = get_min(tatms_c)
            tatms_c_stddev = get_stddev(tatms_c)
            tatms_c_count = get_count(tatms_c)
            tatms_c_sum = get_sum(tatms_c)
            tatms_start = hdf5_getters.get_tatums_start(h5)
            tatms_start_avg = get_avg(tatms_start)
            tatms_start_max = get_max(tatms_start)
            tatms_start_min = get_min(tatms_start)
            tatms_start_stddev = get_stddev(tatms_start)
            tatms_start_count = get_count(tatms_start)
            tatms_start_sum = get_sum(tatms_start)

            #Getting the genres
            genre_set = 0  #flag to see if the genre has been set or not
            art_trm = hdf5_getters.get_artist_terms(h5)
            trm_freq = hdf5_getters.get_artist_terms_freq(h5)
            trn_wght = hdf5_getters.get_artist_terms_weight(h5)
            a_mb_tags = hdf5_getters.get_artist_mbtags(h5)
            genre_indexes = get_genre_indexes(
                trm_freq)  #index of the highest freq
            final_genre = []
            genres_so_far = []
            for i in range(len(genre_indexes)):
                genre_tmp = get_genre(
                    art_trm, genre_indexes[i]
                )  #genre that corresponds to the highest freq
                genres_so_far = genre_dict.get_genre_in_dict(
                    genre_tmp)  #getting the genre from the dictionary
                if len(genres_so_far) != 0:
                    for i in genres_so_far:
                        final_genre.append(i)
                        genre_set = 1  #genre was found in dictionary

            if genre_set == 1:
                col_num = []

                for genre in final_genre:
                    column = int(
                        genre)  #getting the column number of the genre
                    col_num.append(column)

                genre_array = genre_columns(col_num)  #genre array
            else:
                genre_array = genre_columns(
                    -1)  #the genre was not found in the dictionary

            transpose_pitch = seg_pitch.transpose(
            )  #this is to tranpose the matrix,so we can have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_pitch_avg = []
            seg_pitch_max = []
            seg_pitch_min = []
            seg_pitch_stddev = []
            seg_pitch_count = []
            seg_pitch_sum = []
            i = 0
            #Getting the aggregate values in the pitches array
            for row in transpose_pitch:
                seg_pitch_avg.append(get_avg(row))
                seg_pitch_max.append(get_max(row))
                seg_pitch_min.append(get_min(row))
                seg_pitch_stddev.append(get_stddev(row))
                seg_pitch_count.append(get_count(row))
                seg_pitch_sum.append(get_sum(row))
                i = i + 1

            #extracting information from the timbre array
            transpose_timbre = seg_pitch.transpose(
            )  #tranposing matrix, to have 12 rows
            #arrays containing the aggregate values of the 12 rows
            seg_timbre_avg = []
            seg_timbre_max = []
            seg_timbre_min = []
            seg_timbre_stddev = []
            seg_timbre_count = []
            seg_timbre_sum = []
            i = 0
            for row in transpose_timbre:
                seg_timbre_avg.append(get_avg(row))
                seg_timbre_max.append(get_max(row))
                seg_timbre_min.append(get_min(row))
                seg_timbre_stddev.append(get_stddev(row))
                seg_timbre_count.append(get_count(row))
                seg_timbre_sum.append(get_sum(row))
                i = i + 1

        #Writing to the flat file
            writer.writerow([
                title, album, artist_name, year, duration, seg_start_count,
                tempo
            ])

            h5.close()
            count = count + 1
            print count
def classify(h5):
    output_array = {}
    # duration
    duration = hdf5_getters.get_duration(h5)
    output_array["duration"] = duration  ### ADDED VALUE TO ARRAY
    # number of bars
    bars = hdf5_getters.get_bars_start(h5)
    num_bars = len(bars)
    output_array["num_bars"] = num_bars  ### ADDED VALUE TO ARRAY
    # mean and variance in bar length
    bar_length = numpy.ediff1d(bars)
    variance_bar_length = numpy.var(bar_length)
    output_array[
        "variance_bar_length"] = variance_bar_length  ### ADDED VALUE TO ARRAY
    # number of beats
    beats = hdf5_getters.get_beats_start(h5)
    num_beats = len(beats)
    output_array["num_beats"] = num_beats  ### ADDED VALUE TO ARRAY
    # mean and variance in beats length
    beats_length = numpy.ediff1d(beats)
    variance_beats_length = numpy.var(bar_length)
    output_array[
        "variance_beats_length"] = variance_beats_length  ### ADDED VALUE TO ARRAY
    # danceability
    danceability = hdf5_getters.get_danceability(h5)
    output_array["danceability"] = danceability  ### ADDED VALUE TO ARRAY
    # end of fade in
    end_of_fade_in = hdf5_getters.get_end_of_fade_in(h5)
    output_array["end_of_fade_in"] = end_of_fade_in  ### ADDED VALUE TO ARRAY
    # energy
    energy = hdf5_getters.get_energy(h5)
    output_array["energy"] = energy  ### ADDED VALUE TO ARRAY
    # key
    key = hdf5_getters.get_key(h5)
    output_array["key"] = int(key)  ### ADDED VALUE TO ARRAY
    # loudness
    loudness = hdf5_getters.get_loudness(h5)
    output_array["loudness"] = loudness  ### ADDED VALUE TO ARRAY
    # mode
    mode = hdf5_getters.get_mode(h5)
    output_array["mode"] = int(mode)  ### ADDED VALUE TO ARRAY
    # number sections
    sections = hdf5_getters.get_sections_start(h5)
    num_sections = len(sections)
    output_array["num_sections"] = num_sections  ### ADDED VALUE TO ARRAY
    # mean and variance in sections length
    sections_length = numpy.ediff1d(sections)
    variance_sections_length = numpy.var(sections)
    output_array[
        "variance_sections_length"] = variance_sections_length  ### ADDED VALUE TO ARRAY
    # number segments
    segments = hdf5_getters.get_segments_start(h5)
    num_segments = len(segments)
    output_array["num_segments"] = num_segments  ### ADDED VALUE TO ARRAY
    # mean and variance in segments length
    segments_length = numpy.ediff1d(segments)
    variance_segments_length = numpy.var(segments)
    output_array[
        "variance_segments_length"] = variance_segments_length  ### ADDED VALUE TO ARRAY
    # segment loudness max
    segment_loudness_max_array = hdf5_getters.get_segments_loudness_max(h5)
    segment_loudness_max_time_array = hdf5_getters.get_segments_loudness_max_time(
        h5)
    segment_loudness_max_index = 0
    for i in range(len(segment_loudness_max_array)):
        if segment_loudness_max_array[i] > segment_loudness_max_array[
                segment_loudness_max_index]:
            segment_loudness_max_index = i
    segment_loudness_max = segment_loudness_max_array[
        segment_loudness_max_index]
    segment_loudness_max_time = segment_loudness_max_time_array[
        segment_loudness_max_index]
    output_array[
        "segment_loudness_max"] = segment_loudness_max  ### ADDED VALUE TO ARRAY
    output_array[
        "segment_loudness_time"] = segment_loudness_max_time  ### ADDED VALUE TO ARRAY

    # POSSIBLE TODO: use average function instead and weight by segment length
    # segment loudness mean (start)
    segment_loudness_array = hdf5_getters.get_segments_loudness_start(h5)
    segment_loudness_mean = numpy.mean(segment_loudness_array)
    output_array[
        "segment_loudness_mean"] = segment_loudness_mean  ### ADDED VALUE TO ARRAY
    # segment loudness variance (start)
    segment_loudness_variance = numpy.var(segment_loudness_array)
    output_array[
        "segment_loudness_variance"] = segment_loudness_variance  ### ADDED VALUE TO ARRAY
    # segment pitches
    segment_pitches_array = hdf5_getters.get_segments_pitches(h5)
    segment_pitches_mean = numpy.mean(segment_pitches_array, axis=0).tolist()
    output_array["segment_pitches_mean"] = segment_pitches_mean
    # segment pitches variance (start)
    segment_pitches_variance = numpy.var(segment_pitches_array,
                                         axis=0).tolist()
    output_array["segment_pitches_variance"] = segment_pitches_variance
    # segment timbres
    segment_timbres_array = hdf5_getters.get_segments_timbre(h5)
    segment_timbres_mean = numpy.mean(segment_timbres_array, axis=0).tolist()
    output_array["segment_timbres_mean"] = segment_timbres_mean
    # segment timbres variance (start)
    segment_timbres_variance = numpy.var(segment_timbres_array,
                                         axis=0).tolist()
    output_array["segment_timbres_variance"] = segment_timbres_variance
    # hotttnesss
    hottness = hdf5_getters.get_song_hotttnesss(h5, 0)
    output_array["hottness"] = hottness  ### ADDED VALUE TO ARRAY
    # duration-start of fade out
    start_of_fade_out = hdf5_getters.get_start_of_fade_out(h5)
    fade_out = duration - start_of_fade_out
    output_array["fade_out"] = fade_out  ### ADDED VALUE TO ARRAY
    # tatums
    tatums = hdf5_getters.get_tatums_start(h5)
    num_tatums = len(tatums)
    output_array["num_tatums"] = num_tatums  ### ADDED VALUE TO ARRAY
    # mean and variance in tatums length
    tatums_length = numpy.ediff1d(tatums)
    variance_tatums_length = numpy.var(tatums_length)
    output_array[
        "variance_tatums_length"] = variance_tatums_length  ### ADDED VALUE TO ARRAY
    # tempo
    tempo = hdf5_getters.get_tempo(h5)
    output_array["tempo"] = tempo  ### ADDED VALUE TO ARRAY
    # time signature
    time_signature = hdf5_getters.get_time_signature(h5)
    output_array["time_signature"] = int(
        time_signature)  ### ADDED VALUE TO ARRAY
    # year
    year = hdf5_getters.get_year(h5)
    output_array["year"] = int(year)  ### ADDED VALUE TO ARRAY
    # artist terms
    artist_terms = hdf5_getters.get_artist_terms(h5, 0)
    output_array["artist_terms"] = artist_terms.tolist()
    artist_terms_freq = hdf5_getters.get_artist_terms_freq(h5, 0)
    output_array["artist_terms_freq"] = artist_terms_freq.tolist()
    artist_name = hdf5_getters.get_artist_name(h5, 0)
    output_array["artist_name"] = artist_name
    artist_id = hdf5_getters.get_artist_id(h5, 0)
    output_array["artist_id"] = artist_id
    # title
    title = hdf5_getters.get_title(h5, 0)
    output_array["title"] = title

    return output_array
Exemple #41
0
            # Get artist HOTTTNESSSSSS
            hotttness = hdf5_getters.get_artist_hotttnesss(h5)

            # Get artist familiarity
            familiarity = hdf5_getters.get_artist_familiarity(h5)

            # Get danceability
            danceability = hdf5_getters.get_danceability(h5)

            # Get duration
            duration = hdf5_getters.get_duration(h5)

            # Get energy
            #*****useless... column is filled with 0's?
            energy = hdf5_getters.get_energy(h5)

            # Get loudness
            loudness = hdf5_getters.get_loudness(h5)

            # Get year
            year = hdf5_getters.get_year(h5)

            # Get tempo
            tempo = hdf5_getters.get_tempo(h5)

            #########################################################

            # Get analysis sample rate
            analysis_rate = hdf5_getters.get_analysis_sample_rate(h5)
Exemple #42
0
def getInfo(files):
    data = []
    build_str = ''
    with open(sys.argv[1], 'r') as f:
        contents = f.read()
        c = contents.split()
    f.close()
    print("creating csv with following fields:" + contents)
    for i in c:
        build_str = build_str + i + ','
    build_str = build_str[:-1]
    build_str = build_str + '\n'
    for fil in files:
        curFile = getters.open_h5_file_read(fil)
        d2 = {}
        get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)}
        tid = fil.split('/')[-1].split('.')[0]
        # print(c)
        for i in c:
            if i in get_table: 
               d2[i] = get_table[i]
               d2[i] = str(d2[i]).replace('\n','')  
               build_str = build_str + d2[i] + ','
            else:
                print('error: unspecified field')
                exit(0)
        build_str = build_str[:-1]
        # print(build_str[:-1])
        build_str = build_str + '\n'
        curFile.close()
    build_str = build_str.replace('b','').replace("'",'').replace('"','')  
    return (build_str)