def debug_from_song_file(connect,h5path,verbose=0): """ Slow debugging function that takes a h5 file, reads the info, check the match with musicbrainz db, prints out the result. Only prints when we dont get exact match! RETURN counts of how many files we filled for years, tags """ import hdf5_utils as HDF5 import hdf5_getters as GETTERS h5 = HDF5.open_h5_file_read(h5path) title = GETTERS.get_title(h5) release = GETTERS.get_release(h5) artist = GETTERS.get_artist_name(h5) ambid = GETTERS.get_artist_mbid(h5) h5.close() # mbid gotmbid=1 if ambid=='': gotmbid = 0 if verbose>0: print('no mb id for:',artist) # year year = find_year_safemode(connect,ambid,title,release,artist) gotyear = 1 if year > 0 else 0 if verbose>0: print('no years for:',artist,'|',release,'|',title) # tags tags,counts = get_artist_tags(connect,ambid) gottags = 1 if len(tags) > 0 else 0 if gottags == 0 and verbose>0: print('no tags for:',artist) # return indicator for mbid, year, tag return gotmbid,gotyear,gottags
def debug_from_song_file(connect, h5path, verbose=0): """ Slow debugging function that takes a h5 file, reads the info, check the match with musicbrainz db, prints out the result. Only prints when we dont get exact match! RETURN counts of how many files we filled for years, tags """ import hdf5_utils as HDF5 import hdf5_getters as GETTERS h5 = HDF5.open_h5_file_read(h5path) title = GETTERS.get_title(h5) release = GETTERS.get_release(h5) artist = GETTERS.get_artist_name(h5) ambid = GETTERS.get_artist_mbid(h5) h5.close() # mbid gotmbid = 1 if ambid == '': gotmbid = 0 if verbose > 0: print('no mb id for:', artist) # year year = find_year_safemode(connect, ambid, title, release, artist) gotyear = 1 if year > 0 else 0 if verbose > 0: print('no years for:', artist, '|', release, '|', title) # tags tags, counts = get_artist_tags(connect, ambid) gottags = 1 if len(tags) > 0 else 0 if gottags == 0 and verbose > 0: print('no tags for:', artist) # return indicator for mbid, year, tag return gotmbid, gotyear, gottags
def add_to_database(con, full_path): data = h5.open_h5_file_read(full_path) number_of_songs = h5.get_num_songs(data) for song_index in xrange(0, number_of_songs): artist_id = get_artist_id(con, h5.get_artist_mbid(data, song_index)) if artist_id == -1: artist_id = add_data_to_artists_table(con, data, song_index) add_data_to_artists_rel_table(con, data, song_index, artist_id) add_data_to_songs_table(con, data, artist_id) data.close()
def get_all_attributes(filename): """ This function does 3 simple things: - open the song file - get all required attributes - write it to a csv file - close the files """ with open('attributes.csv', 'a') as csvfile: try: # let's apply the previous function to all files csvwriter = csv.writer(csvfile, delimiter='\t') h5 = GETTERS.open_h5_file_read(filename) RESULTS = [] RESULTS.append(GETTERS.get_year(h5)) RESULTS.append(GETTERS.get_artist_id(h5)) RESULTS.append(GETTERS.get_artist_name(h5)) RESULTS.append(GETTERS.get_artist_mbid(h5)) RESULTS.append(convert_terms(GETTERS.get_artist_terms(h5))) RESULTS.append(GETTERS.get_artist_hotttnesss(h5)) RESULTS.append(GETTERS.get_artist_latitude(h5)) RESULTS.append(GETTERS.get_artist_longitude(h5)) RESULTS.append(GETTERS.get_artist_familiarity(h5)) RESULTS.append(GETTERS.get_danceability(h5)) RESULTS.append(GETTERS.get_duration(h5)) RESULTS.append(GETTERS.get_energy(h5)) RESULTS.append(GETTERS.get_loudness(h5)) RESULTS.append(GETTERS.get_song_hotttnesss(h5)) RESULTS.append(GETTERS.get_song_id(h5)) RESULTS.append(GETTERS.get_tempo(h5)) RESULTS.append(GETTERS.get_time_signature(h5)) RESULTS.append(GETTERS.get_title(h5)) RESULTS.append(GETTERS.get_track_id(h5)) RESULTS.append(GETTERS.get_release(h5)) csvwriter.writerow(RESULTS) h5.close() except AttributeError: pass
def add_data_to_artists_table(con, data, song_index): values = [] for property in artist_properties: artist_data = get_property(property, data, song_index) if type(artist_data) == np.ndarray: artist_data = serialize(artist_data) values.append(simple_type(artist_data)) command = "INSERT INTO Artists (" is_first = True for property in artist_properties: if not is_first: command += ", " + property else: command += " " + property is_first = False command += ") VALUES (" is_first = True for _ in values: if not is_first: command += ", " else: is_first = False command += "?" command += ");" cursor = con.cursor() cursor.execute(command, values) con.commit() artist_id = get_artist_id(con, h5.get_artist_mbid(data, song_index)) return artist_id
def fill_attributes(song, songH5File): #----------------------------non array attributes------------------------------- song.analysisSampleRate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File)) song.artistFamiliarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File)) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File)) song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_segments_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_sections_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.release = str(hdf5_getters.get_release(songH5File)) song.releaseDigitalID = str( hdf5_getters.get_release_7digitalid(songH5File)) song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.trackID = str(hdf5_getters.get_track_id(songH5File)) song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #-------------------------------array attributes-------------------------------------- #array float song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar( hdf5_getters.get_beats_start(songH5File)) #array float song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_freq(songH5File)) #array float song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_weight(songH5File)) #array int song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar( hdf5_getters.get_artist_mbtags_count(songH5File)) #array float song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_bars_confidence(songH5File)) #array float song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar( hdf5_getters.get_bars_start(songH5File)) #array float song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_beats_confidence(songH5File)) #array float song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_sections_confidence(songH5File)) #array float song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar( hdf5_getters.get_sections_start(songH5File)) #array float song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_segments_confidence(songH5File)) #array float song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max(songH5File)) #array float song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max_time(songH5File)) #array float song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_start(songH5File)) #array float song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_start(songH5File)) #array float song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_tatums_confidence(songH5File)) #array float song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar( hdf5_getters.get_tatums_start(songH5File)) #array2d float song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_timbre(songH5File)) #array2d float song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_pitches(songH5File)) #------------------------array string attributes------------------------ song.similarArtists = convert_array_to_string( hdf5_getters.get_similar_artists(songH5File)) #array string song.artistTerms = convert_array_to_string( hdf5_getters.get_artist_terms(songH5File)) #array string song.artistmbTags = convert_array_to_string( hdf5_getters.get_artist_mbtags(songH5File)) #array string return song
def data_to_flat_file(basedir,ext='.h5') : """This function extract the information from the tables and creates the flat file.""" count = 0; #song counter list_to_write= [] row_to_write = "" writer = csv.writer(open("metadata.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') comma=title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') #eliminating commas in the album comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,"); if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg= get_avg(bars_c) bars_c_max= get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev= get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max= get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev= get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg= get_avg(beats_c) beats_c_max= get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev= get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max= get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev= get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg= get_avg(sec_c) sec_c_max= get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev= get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max= get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev= get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg= get_avg(seg_c) seg_c_max= get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev= get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg= get_avg(seg_loud_max) seg_loud_max_max= get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev= get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg= get_avg(seg_loud_max_time) seg_loud_max_time_max= get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg= get_avg(seg_loud_start) seg_loud_start_max= get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev= get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg= get_avg(seg_start) seg_start_max= get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev= get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg= get_avg(tatms_c) tatms_c_max= get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev= get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg= get_avg(tatms_start) tatms_start_max= get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev= get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 #genre was found in dictionary if genre_set == 1: col_num=[] for genre in final_genre: column=int(genre) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array else: genre_array=genre_columns(-1) #the genre was not found in the dictionary transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 #Writing to the flat file writer.writerow([title,album,artist_name,duration,samp_rt,artist_7digitalid,artist_fam,artist_hotness,artist_id,artist_lat,artist_loc,artist_lon,artist_mbid,genre_array[0],genre_array[1],genre_array[2], genre_array[3],genre_array[4],genre_array[5],genre_array[6],genre_array[7],genre_array[8],genre_array[9],genre_array[10],genre_array[11],genre_array[12],genre_array[13],genre_array[14],genre_array[15], genre_array[16],genre_array[17],genre_array[18],genre_array[19],genre_array[20],genre_array[21],genre_array[22],genre_array[23],genre_array[24],genre_array[25],genre_array[26], genre_array[27],genre_array[28],genre_array[29],genre_array[30],genre_array[31],genre_array[32],genre_array[33],genre_array[34],genre_array[35],genre_array[36],genre_array[37],genre_array[38], genre_array[39],genre_array[40],genre_array[41],genre_array[42],genre_array[43],genre_array[44],genre_array[45],genre_array[46],genre_array[47],genre_array[48],genre_array[49], genre_array[50],genre_array[51],genre_array[52],genre_array[53],genre_array[54],genre_array[55],genre_array[56],genre_array[57],genre_array[58],genre_array[59], genre_array[60],genre_array[61],genre_array[62],genre_array[63],genre_array[64],genre_array[65],genre_array[66],genre_array[67],genre_array[68],genre_array[69], genre_array[70],genre_array[71],genre_array[72],genre_array[73],genre_array[74],genre_array[75],genre_array[76],genre_array[77],genre_array[78],genre_array[79], genre_array[80],genre_array[81],genre_array[82],genre_array[83],genre_array[84],genre_array[85],genre_array[86],genre_array[87],genre_array[88],genre_array[89], genre_array[90],genre_array[91],genre_array[92],genre_array[93],genre_array[94],genre_array[95],genre_array[96],genre_array[97],genre_array[98],genre_array[99],genre_array[100],genre_array[101], genre_array[102],genre_array[103],genre_array[104],genre_array[105],genre_array[106],genre_array[107],genre_array[108],genre_array[109],genre_array[110],genre_array[111],genre_array[112], genre_array[113],genre_array[114],genre_array[115],genre_array[116],genre_array[117],genre_array[118],genre_array[119],genre_array[120],genre_array[121],genre_array[122],genre_array[123], genre_array[124],genre_array[125],genre_array[126],genre_array[127],genre_array[128],genre_array[129],genre_array[130],genre_array[131],genre_array[132], artist_pmid,audio_md5,danceability,end_fade_in,energy,song_key,key_c,loudness,mode,mode_conf,release_7digitalid,song_hot,song_id,start_fade_out,tempo,time_sig,time_sig_c,track_id,track_7digitalid,year,bars_c_avg,bars_c_max,bars_c_min,bars_c_stddev,bars_c_count,bars_c_sum,bars_start_avg,bars_start_max,bars_start_min,bars_start_stddev,bars_start_count,bars_start_sum,beats_c_avg,beats_c_max,beats_c_min,beats_c_stddev,beats_c_count,beats_c_sum,beats_start_avg,beats_start_max,beats_start_min, beats_start_stddev,beats_start_count,beats_start_sum, sec_c_avg,sec_c_max,sec_c_min,sec_c_stddev,sec_c_count,sec_c_sum,sec_start_avg,sec_start_max,sec_start_min,sec_start_stddev,sec_start_count,sec_start_sum,seg_c_avg,seg_c_max,seg_c_min,seg_c_stddev,seg_c_count,seg_c_sum,seg_loud_max_avg,seg_loud_max_max,seg_loud_max_min,seg_loud_max_stddev,seg_loud_max_count,seg_loud_max_sum,seg_loud_max_time_avg,seg_loud_max_time_max,seg_loud_max_time_min,seg_loud_max_time_stddev,seg_loud_max_time_count,seg_loud_max_time_sum,seg_loud_start_avg,seg_loud_start_max,seg_loud_start_min,seg_loud_start_stddev,seg_loud_start_count,seg_loud_start_sum,seg_pitch_avg[0],seg_pitch_max[0],seg_pitch_min[0],seg_pitch_stddev[0],seg_pitch_count[0],seg_pitch_sum[0],seg_pitch_avg[1],seg_pitch_max[1],seg_pitch_min[1],seg_pitch_stddev[1],seg_pitch_count[1],seg_pitch_sum[1],seg_pitch_avg[2],seg_pitch_max[2],seg_pitch_min[2],seg_pitch_stddev[2],seg_pitch_count[2],seg_pitch_sum[2],seg_pitch_avg[3],seg_pitch_max[3],seg_pitch_min[3],seg_pitch_stddev[3],seg_pitch_count[3],seg_pitch_sum[3],seg_pitch_avg[4],seg_pitch_max[4],seg_pitch_min[4],seg_pitch_stddev[4],seg_pitch_count[4],seg_pitch_sum[4],seg_pitch_avg[5],seg_pitch_max[5],seg_pitch_min[5],seg_pitch_stddev[5],seg_pitch_count[5],seg_pitch_sum[5],seg_pitch_avg[6],seg_pitch_max[6],seg_pitch_min[6],seg_pitch_stddev[6],seg_pitch_count[6],seg_pitch_sum[6],seg_pitch_avg[7],seg_pitch_max[7],seg_pitch_min[7],seg_pitch_stddev[7],seg_pitch_count[7],seg_pitch_sum[7],seg_pitch_avg[8],seg_pitch_max[8],seg_pitch_min[8],seg_pitch_stddev[8],seg_pitch_count[8],seg_pitch_sum[8],seg_pitch_avg[9],seg_pitch_max[9],seg_pitch_min[9],seg_pitch_stddev[9],seg_pitch_count[9],seg_pitch_sum[9],seg_pitch_avg[10],seg_pitch_max[10],seg_pitch_min[10],seg_pitch_stddev[10],seg_pitch_count[10],seg_pitch_sum[10],seg_pitch_avg[11],seg_pitch_max[11],seg_pitch_min[11], seg_pitch_stddev[11],seg_pitch_count[11],seg_pitch_sum[11],seg_start_avg,seg_start_max,seg_start_min,seg_start_stddev, seg_start_count,seg_start_sum,seg_timbre_avg[0],seg_timbre_max[0],seg_timbre_min[0],seg_timbre_stddev[0],seg_timbre_count[0], seg_timbre_sum[0],seg_timbre_avg[1],seg_timbre_max[1],seg_timbre_min[1],seg_timbre_stddev[1],seg_timbre_count[1], seg_timbre_sum[1],seg_timbre_avg[2],seg_timbre_max[2],seg_timbre_min[2],seg_timbre_stddev[2],seg_timbre_count[2], seg_timbre_sum[2],seg_timbre_avg[3],seg_timbre_max[3],seg_timbre_min[3],seg_timbre_stddev[3],seg_timbre_count[3], seg_timbre_sum[3],seg_timbre_avg[4],seg_timbre_max[4],seg_timbre_min[4],seg_timbre_stddev[4],seg_timbre_count[4], seg_timbre_sum[4],seg_timbre_avg[5],seg_timbre_max[5],seg_timbre_min[5],seg_timbre_stddev[5],seg_timbre_count[5], seg_timbre_sum[5],seg_timbre_avg[6],seg_timbre_max[6],seg_timbre_min[6],seg_timbre_stddev[6],seg_timbre_count[6], seg_timbre_sum[6],seg_timbre_avg[7],seg_timbre_max[7],seg_timbre_min[7],seg_timbre_stddev[7],seg_timbre_count[7], seg_timbre_sum[7],seg_timbre_avg[8],seg_timbre_max[8],seg_timbre_min[8],seg_timbre_stddev[8],seg_timbre_count[8], seg_timbre_sum[8],seg_timbre_avg[9],seg_timbre_max[9],seg_timbre_min[9],seg_timbre_stddev[9],seg_timbre_count[9], seg_timbre_sum[9],seg_timbre_avg[10],seg_timbre_max[10],seg_timbre_min[10],seg_timbre_stddev[10],seg_timbre_count[10], seg_timbre_sum[10],seg_timbre_avg[11],seg_timbre_max[11],seg_timbre_min[11],seg_timbre_stddev[11],seg_timbre_count[11], seg_timbre_sum[11],tatms_c_avg,tatms_c_max,tatms_c_min,tatms_c_stddev,tatms_c_count,tatms_c_sum,tatms_start_avg,tatms_start_max,tatms_start_min,tatms_start_stddev,tatms_start_count,tatms_start_sum]) h5.close() count=count+1; print count;
def complete_hd5_to_csv(basedir): ext = '.h5' # Get all files with extension .h5 # Header title. Essentially it is a schema for all the following songs header = [ 'Title', 'Artist familiarity', 'Artist hotness', 'Artist ID', 'Artist mbID', 'Artist playmeid', 'Artist 7DigitalID', 'Artist latitude', 'Artist longitude', 'Artist location', 'Artist Name', 'Release', 'Release 7DigitalID', 'Song ID', 'Song Hotness', 'Track 7Digital', 'Analysis sample rate', 'Audio md5', 'Danceability', 'Duration', 'End of Fade', 'Energy', 'Key', 'Key Confidence', 'Loudness', 'Mode', 'Mode Confidence', 'Start of fade out', 'Tempo', 'Time signature', 'Time signature confidence', 'Track ID', 'Year' ] with open('Tester2.csv', 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') # writing the header line. This line contains the schema of the data csv_writer.writerow(header) # Read all files from the given directories for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) print(files) for f in files: h5 = hdf5_getters.open_h5_file_read(f) # Write as row all elements. NOTE: Only the serialized elements are parsed and not arrays csv_writer.writerow([ hdf5_getters.get_title(h5), hdf5_getters.get_artist_familiarity(h5), hdf5_getters.get_artist_hotttnesss(h5), hdf5_getters.get_artist_id(h5), hdf5_getters.get_artist_mbid(h5), hdf5_getters.get_artist_playmeid(h5), hdf5_getters.get_artist_7digitalid(h5), hdf5_getters.get_artist_latitude(h5), hdf5_getters.get_artist_longitude(h5), hdf5_getters.get_artist_location(h5), hdf5_getters.get_artist_name(h5), hdf5_getters.get_release(h5), hdf5_getters.get_release_7digitalid(h5), hdf5_getters.get_song_id(h5), hdf5_getters.get_song_hotttnesss(h5), hdf5_getters.get_track_7digitalid(h5), hdf5_getters.get_analysis_sample_rate(h5), hdf5_getters.get_audio_md5(h5), hdf5_getters.get_danceability(h5), hdf5_getters.get_duration(h5), hdf5_getters.get_end_of_fade_in(h5), hdf5_getters.get_energy(h5), hdf5_getters.get_key(h5), hdf5_getters.get_key_confidence(h5), hdf5_getters.get_loudness(h5), hdf5_getters.get_mode(h5), hdf5_getters.get_mode_confidence(h5), hdf5_getters.get_start_of_fade_out(h5), hdf5_getters.get_tempo(h5), hdf5_getters.get_time_signature(h5), hdf5_getters.get_time_signature_confidence(h5), hdf5_getters.get_track_id(h5), hdf5_getters.get_year(h5) ]) # For debugging purposes. Everything as expected # print() # print("Num of songs -- ", hdf5_getters.get_num_songs(h5)) # One song per file # print("Title -- ", hdf5_getters.get_title(h5)) # Print the title of a specific h5 file # print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5)) # print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5)) # print("Artist ID -- ", hdf5_getters.get_artist_id(h5)) # print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5)) # print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5)) # print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5)) # print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5)) # print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5)) # print("Artist location -- ", hdf5_getters.get_artist_location(h5)) # print("Artist Name -- ", hdf5_getters.get_artist_name(h5)) # print("Release -- ", hdf5_getters.get_release(h5)) # print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5)) # print("Song ID -- ", hdf5_getters.get_song_id(h5)) # print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5)) # print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5)) # print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5)) # print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5)) # print("Danceability -- ", hdf5_getters.get_danceability(h5)) # print("Duration -- ", hdf5_getters.get_duration(h5)) # print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5)) # print("Energy -- ", hdf5_getters.get_energy(h5)) # print("Key -- ", hdf5_getters.get_key(h5)) # print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5)) # print("Loudness -- ", hdf5_getters.get_loudness(h5)) # print("Mode -- ", hdf5_getters.get_mode(h5)) # print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5)) # print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5)) # print("Tempo -- ", hdf5_getters.get_tempo(h5)) # print("Time signature -- ", hdf5_getters.get_time_signature(h5)) # print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5)) # print("Track ID -- ", hdf5_getters.get_track_id(h5)) # # print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5)) # # print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5)) # print("Year -- ", hdf5_getters.get_year(h5)) h5.close()
def main(): outputFile1 = open('SongCSVFinal.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ( "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation," + "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature," + "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence," + "Title,Year,mbID,Energy,ArtistFamiliarity,Hotness,end_of_fade_in,key,keyConfidence,Loudness," + "mode,mode_confidence,start_of_fade_out") ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,") outputFile1.write(csvRowString + "\n") csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "." # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #files = glob.glob(os.path.join(root,'*'+ext)) #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) print(files) for f in files: songH5File = hdf5_getters.open_h5_file_read(f) print('hello 1') print(songH5File) song = Song(str(hdf5_getters.get_song_id(songH5File))) testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) song.mbID = str(hdf5_getters.get_artist_mbid(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) #song.beatConfidence = str(hdf5_getters.get_beats_confidence(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.artistFamiliarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.keyConfidence = str( hdf5_getters.get_key_confidence(songH5File)) song.hotness = str(hdf5_getters.get_artist_hotttnesss(songH5File)) #song.sampleRate= str(hdf5_getters.get_analysis_sample_rate(songH5File)) song.end_of_fade_in = str( hdf5_getters.get_end_of_fade_in(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.mode_confidence = str( hdf5_getters.get_mode_confidence(songH5File)) song.start_of_fade_out = str( hdf5_getters.get_start_of_fade_out(songH5File)) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',', "") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',', '') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'mbID'.lower(): csvRowString += song.mbID elif attribute == 'Energy'.lower(): csvRowString += song.energy elif attribute == 'ArtistFamiliarity'.lower(): csvRowString += song.artistFamiliarity elif attribute == 'Hotness'.lower(): csvRowString += song.hotness #elif attribute == 'SampleRate'.lower(): #csvRowString += song.sampleRate elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'key'.lower(): csvRowString += song.key elif attribute == 'keyConfidence'.lower(): csvRowString += song.keyConfidence elif attribute == 'Loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_confidence'.lower(): csvRowString += song.mode_confidence elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
def main(argv): if len(argv) != 1: print "Specify data directory" return basedir = argv[0] outputFile1 = open('SongCSV.csv', 'w') outputFile2 = open('TagsCSV.csv', 'w') csvRowString = "" csvLabelString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+ " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString); csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) #csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+ # "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+ # "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+ # "Title,Year") csvRowString = ("ArtistFamiliarity,ArtistHotttnesss,"+ "BarsConfidence,BarsStart,BeatsConfidence,BeatsStart,Duration,"+ "EndOfFadeIn,Key,KeyConfidence,Loudness,Mode,ModeConfidence,"+ "SectionsConfidence,SectionsStart,SegmentsConfidence,SegmentsLoudnessMax,"+ "SegmentsLoudnessMaxTime,SegmentsLoudnessStart,SegmentsStart,"+ "SongHotttnesss,StartOfFadeOut,TatumsConfidence,TatumsStart,Tempo,TimeSignature,TimeSignatureConfidence,"+ "SegmentsPitches,SegmentsTimbre,Title,Year,Decade,ArtistMbtags") ################################################# header = str() csvAttributeList = re.split('\W+', csvRowString) arrayAttributes = ["BarsConfidence","BarsStart","BeatsConfidence","BeatsStart", "SectionsConfidence","SectionsStart","SegmentsConfidence","SegmentsLoudnessMax", "SegmentsLoudnessMaxTime","SegmentsLoudnessStart","SegmentsStart", "TatumsConfidence","TatumsStart"] for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() if(v=="SegmentsPitches"): for i in range(90): header = header + "SegmentsPitches" + str(i) + "," elif(v=="SegmentsTimbre"): for i in range(90): header = header + "SegmentsTimbre" + str(i) + "," elif(v in arrayAttributes): header = header + v + str(0) + "," header = header + v + str(1) + "," else: header = header + v + "," outputFile1.write("SongNumber,"); #outputFile1.write(csvRowString + "\n"); outputFile1.write(header + "\n"); csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate #basedir = "MillionSongSubset/data/A/A/" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP all = sorted(os.walk(basedir)) for root, dirs, files in all: files = sorted(glob.glob(os.path.join(root,'*'+ext))) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) #testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.analysisSampleRate = str(hdf5_getters.get_analysis_sample_rate(songH5File)) song.artistFamiliarity = str(hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistMbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.barsConfidence = np.array(hdf5_getters.get_bars_confidence(songH5File)) song.barsStart = np.array(hdf5_getters.get_bars_start(songH5File)) song.beatsConfidence = np.array(hdf5_getters.get_beats_confidence(songH5File)) song.beatsStart = np.array(hdf5_getters.get_beats_start(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.sectionsConfidence = np.array(hdf5_getters.get_sections_confidence(songH5File)) song.sectionsStart = np.array(hdf5_getters.get_sections_start(songH5File)) song.segmentsConfidence = np.array(hdf5_getters.get_segments_confidence(songH5File)) song.segmentsLoudnessMax = np.array(hdf5_getters.get_segments_loudness_max(songH5File)) song.segmentsLoudnessMaxTime = np.array(hdf5_getters.get_segments_loudness_max_time(songH5File)) song.segmentsLoudnessStart = np.array(hdf5_getters.get_segments_loudness_start(songH5File)) song.segmentsPitches = np.array(hdf5_getters.get_segments_pitches(songH5File)) song.segmentsStart = np.array(hdf5_getters.get_segments_start(songH5File)) song.segmentsTimbre = np.array(hdf5_getters.get_segments_timbre(songH5File)) song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File)) song.tatumsConfidence = np.array(hdf5_getters.get_tatums_confidence(songH5File)) song.tatumsStart = np.array(hdf5_getters.get_tatums_start(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File)) song.songid = str(hdf5_getters.get_song_id(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) song.artistMbtags = str(hdf5_getters.get_artist_mbtags(songH5File)) #print song count csvRowString += str(song.songCount) + "," csvLabelString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AnalysisSampleRate'.lower(): csvRowString += song.analysisSampleRate elif attribute == 'ArtistFamiliarity'.lower(): csvRowString += song.artistFamiliarity elif attribute == 'ArtistHotttnesss'.lower(): csvRowString += song.artistHotttnesss elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistMbid'.lower(): csvRowString += song.artistMbid elif attribute == 'BarsConfidence'.lower(): arr = song.barsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'BarsStart'.lower(): arr = song.barsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'BeatsConfidence'.lower(): arr = song.beatsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'BeatsStart'.lower(): arr = song.beatsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'EndOfFadeIn'.lower(): csvRowString += song.endOfFadeIn elif attribute == 'Energy'.lower(): csvRowString += song.energy elif attribute == 'Key'.lower(): csvRowString += song.key elif attribute == 'KeyConfidence'.lower(): csvRowString += song.keyConfidence elif attribute == 'Loudness'.lower(): csvRowString += song.loudness elif attribute == 'Mode'.lower(): csvRowString += song.mode elif attribute == 'ModeConfidence'.lower(): csvRowString += song.modeConfidence elif attribute == 'SectionsConfidence'.lower(): arr = song.sectionsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SectionsStart'.lower(): arr = song.sectionsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsConfidence'.lower(): arr = song.segmentsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsLoudnessMax'.lower(): arr = song.segmentsLoudnessMax if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsLoudnessMaxTime'.lower(): arr = song.segmentsLoudnessMaxTime if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsLoudnessStart'.lower(): arr = song.segmentsLoudnessStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsStart'.lower(): arr = song.segmentsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SongHotttnesss'.lower(): hotttnesss = song.songHotttnesss if hotttnesss == 'nan': hotttnesss = 'NaN' csvRowString += hotttnesss elif attribute == 'StartOfFadeOut'.lower(): csvRowString += song.startOfFadeOut elif attribute == 'TatumsConfidence'.lower(): arr = song.tatumsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'TatumsStart'.lower(): arr = song.tatumsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'SegmentsPitches'.lower(): colmean = np.mean(song.segmentsPitches,axis=0) for m in colmean: csvRowString += str(m) + "," cov = np.dot(song.segmentsPitches.T,song.segmentsPitches) utriind = np.triu_indices(cov.shape[0]) feats = cov[utriind] for feat in feats: csvRowString += str(feat) + "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] elif attribute == 'SegmentsTimbre'.lower(): colmean = np.mean(song.segmentsTimbre,axis=0) for m in colmean: csvRowString += str(m) + "," cov = np.dot(song.segmentsTimbre.T,song.segmentsTimbre) utriind = np.triu_indices(cov.shape[0]) feats = cov[utriind] for feat in feats: csvRowString += str(feat) + "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'Decade'.lower(): yr = song.year if yr > 0: decade = song.year[:-1] + '0' else: decade = '0' csvRowString += decade elif attribute == 'ArtistMbtags'.lower(): tags = song.artistMbtags[1:-1] tags = "\"" + tags + "\"" tags = tags.replace("\n",'') csvRowString += tags tagsarray = shlex.split(tags) for t in tagsarray: csvLabelString += t + "," else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," ''' if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',',"") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',','') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," ''' #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" lastIndex = len(csvLabelString) csvLabelString = csvLabelString[0:lastIndex-1] csvLabelString += "\n" outputFile2.write(csvLabelString) csvLabelString = "" songH5File.close() outputFile1.close() outputFile2.close()
files = glob.glob(os.path.join(root,'*'+'.h5')) for f in files : h5 = GETTERS.open_h5_file_read(f) num_songs = GETTERS.get_num_songs(h5) print f, num_songs for i in range(num_songs): analysis_sample_rate = GETTERS.get_analysis_sample_rate(h5, i) artist_7digitalid = GETTERS.get_artist_7digitalid(h5, i) artist_familiarity = GETTERS.get_artist_familiarity(h5, i) artist_hotttnesss = GETTERS.get_artist_hotttnesss(h5, i) artist_id = GETTERS.get_artist_id(h5, i) artist_latitude = GETTERS.get_artist_latitude(h5, i) artist_location = GETTERS.get_artist_location(h5, i) artist_longitude = GETTERS.get_artist_longitude(h5, i) artist_mbid = GETTERS.get_artist_mbid(h5, i) artist_mbtags = ','.join(str(e) for e in GETTERS.get_artist_mbtags(h5, i)) # array artist_mbtags_count = ','.join(str(e) for e in GETTERS.get_artist_mbtags_count(h5, i)) # array artist_name = GETTERS.get_artist_name(h5, i) artist_playmeid = GETTERS.get_artist_playmeid(h5, i) artist_terms = ','.join(str(e) for e in GETTERS.get_artist_terms(h5, i)) # array #artist_terms_freq = ','.join(str(e) for e in GETTERS.get_artist_terms_freq(h5, i)) # array #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array #audio_md5 = GETTERS.get_audio_md5(h5, i) #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array #beats_start = ','.join(str(e) for e in GETTERS.get_beats_start(h5, i)) # array danceability = GETTERS.get_danceability(h5, i) duration = GETTERS.get_duration(h5, i) end_of_fade_in = GETTERS.get_end_of_fade_in(h5, i)
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+ " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'track_id'.lower(): csvRowString += 'track_id' elif attribute == 'artist_familiarity'.lower(): csvRowString += 'artist_familiarity' elif attribute == 'artist_hotttnesss'.lower(): csvRowString += 'artist_hotttnesss' elif attribute == 'artist_mbid'.lower(): csvRowString += 'artist_mbid' elif attribute == 'artist_playmeid'.lower(): csvRowString += 'artist_playmeid' elif attribute == 'artist_7digitalid'.lower(): csvRowString += 'artist_7digitalid' elif attribute == 'release'.lower(): csvRowString += 'release' elif attribute == 'release_7digitalid'.lower(): csvRowString += 'release_7digitalid' elif attribute == 'song_hotttnesss'.lower(): csvRowString += 'song_hotttnesss' elif attribute == 'track_7digitalid'.lower(): csvRowString += 'track_7digitalid' elif attribute == 'analysis_sample_rate'.lower(): csvRowString += 'analysis_sample_rate' elif attribute == 'audio_md5'.lower(): csvRowString += 'audio_md5' elif attribute == 'end_of_fade_in'.lower(): csvRowString += 'end_of_fade_in' elif attribute == 'energy'.lower(): csvRowString += 'energy' elif attribute == 'key'.lower(): csvRowString += 'key' elif attribute == 'key_confidence'.lower(): csvRowString += 'key_confidence' elif attribute == 'loudness'.lower(): csvRowString += 'loudness' elif attribute == 'mode'.lower(): csvRowString += 'mode' elif attribute == 'mode_confidence'.lower(): csvRowString += 'mode_confidence' elif attribute == 'start_of_fade_out'.lower(): csvRowString += 'start_of_fade_out' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString); csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+ "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+ "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+ "Title,Year,track_id,artist_hotttnesss,artist_mbid,artist_playmeid,artist_7digitalid,"+ "release,release_7digitalid,song_hotttnesss,track_7digitalid,analysis_sample_rate,audio_md5,"+ "end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out") ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,"); outputFile1.write(csvRowString + "\n"); csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "/vagrant/genrepython/MillionSongSubset" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) song.track_id = str(hdf5_getters.get_track_id(songH5File)) song.artist_familiarity = str(hdf5_getters.get_artist_familiarity(songH5File)) song.artist_hotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.artist_playmeid = str(hdf5_getters.get_artist_playmeid(songH5File)) song.artist_7digitalid = str(hdf5_getters.get_artist_7digitalid(songH5File)) song.release = str(hdf5_getters.get_release(songH5File)) song.release_7digitalid = str(hdf5_getters.get_release_7digitalid(songH5File)) song.song_hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.track_7digitalid = str(hdf5_getters.get_track_7digitalid(songH5File)) song.analysis_sample_rate = str(hdf5_getters.get_analysis_sample_rate(songH5File)) song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File)) song.end_of_fade_in = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.key_confidence = str(hdf5_getters.get_key_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.mode_confidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.start_of_fade_out = str(hdf5_getters.get_start_of_fade_out(songH5File)) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',',"") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',','') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'track_id'.lower(): csvRowString += song.track_id elif attribute == 'artist_familiarity'.lower(): csvRowString += song.artist_familiarity elif attribute == 'artist_hotttnesss'.lower(): csvRowString += song.artist_hotttnesss elif attribute == 'artist_mbid'.lower(): csvRowString += song.artist_mbid elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digitalid'.lower(): csvRowString += song.artist_7digitalid elif attribute == 'release'.lower(): csvRowString += song.release elif attribute == 'release_7digitalid'.lower(): csvRowString += song.release_7digitalid elif attribute == 'song_hotttnesss'.lower(): csvRowString += song.song_hotttnesss elif attribute == 'track_7digitalid'.lower(): csvRowString += song.track_7digitalid elif attribute == 'analysis_sample_rate'.lower(): csvRowString += song.analysis_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += song.audio_md5 elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'key'.lower(): csvRowString += song.key elif attribute == 'key_confidence'.lower(): csvRowString += song.key_confidence elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_confidence'.lower(): csvRowString += song.mode_confidence elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
def hd5_single_random_file_parser(): # Open an h5 file in read mode h5 = hdf5_getters.open_h5_file_read( '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5' ) function_tracker = filter( lambda x: x.startswith('get'), hdf5_getters.__dict__.keys()) # Detects all the getter functions for f in function_tracker: # Print everything in function tracker print(f) # First effort to check what each field contains. print() # 55 available fields (exluding number of songs fields) print("Num of songs -- ", hdf5_getters.get_num_songs(h5)) # One song per file print("Title -- ", hdf5_getters.get_title(h5)) # Print the title of a specific h5 file print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5)) print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5)) print("Artist ID -- ", hdf5_getters.get_artist_id(h5)) print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5)) print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5)) print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5)) print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5)) print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5)) print("Artist location -- ", hdf5_getters.get_artist_location(h5)) print("Artist Name -- ", hdf5_getters.get_artist_name(h5)) print("Release -- ", hdf5_getters.get_release(h5)) print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5)) print("Song ID -- ", hdf5_getters.get_song_id(h5)) print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5)) print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5)) print("Similar artists -- ", hdf5_getters.get_similar_artists(h5)) print("Artist terms -- ", hdf5_getters.get_artist_terms(h5)) print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5)) print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5)) print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5)) print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5)) print("Danceability -- ", hdf5_getters.get_danceability(h5)) print("Duration -- ", hdf5_getters.get_duration(h5)) print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5)) print("Energy -- ", hdf5_getters.get_energy(h5)) print("Key -- ", hdf5_getters.get_key(h5)) print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5)) print("Loudness -- ", hdf5_getters.get_loudness(h5)) print("Mode -- ", hdf5_getters.get_mode(h5)) print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5)) print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5)) print("Tempo -- ", hdf5_getters.get_tempo(h5)) print("Time signature -- ", hdf5_getters.get_time_signature(h5)) print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5)) print("Track ID -- ", hdf5_getters.get_track_id(h5)) print("Segments Start -- ", hdf5_getters.get_segments_start(h5)) print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5)) print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5)) print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5)) print("Segments Loudness max -- ", hdf5_getters.get_segments_loudness_max(h5)) print("Segments Loudness max time-- ", hdf5_getters.get_segments_loudness_max_time(h5)) print("Segments Loudness start -- ", hdf5_getters.get_segments_loudness_start(h5)) print("Sections start -- ", hdf5_getters.get_sections_start(h5)) print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5)) print("Beats start -- ", hdf5_getters.get_beats_start(h5)) print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5)) print("Bars start -- ", hdf5_getters.get_bars_start(h5)) print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5)) print("Tatums start -- ", hdf5_getters.get_tatums_start(h5)) print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5)) print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5)) print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5)) print("Year -- ", hdf5_getters.get_year(h5)) fields = ['Title', 'Artist ID'] with open('Tester2.csv', 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') # writing the fields csv_writer.writerow(fields) # writing the data rows csv_writer.writerow( [hdf5_getters.get_title(h5), hdf5_getters.get_artist_id(h5)]) h5.close() # close h5 when completed in the end
files = glob.glob(os.path.join(root, '*' + '.h5')) for f in files: h5 = GETTERS.open_h5_file_read(f) num_songs = GETTERS.get_num_songs(h5) print f, num_songs for i in range(num_songs): analysis_sample_rate = GETTERS.get_analysis_sample_rate(h5, i) artist_7digitalid = GETTERS.get_artist_7digitalid(h5, i) artist_familiarity = GETTERS.get_artist_familiarity(h5, i) artist_hotttnesss = GETTERS.get_artist_hotttnesss(h5, i) artist_id = GETTERS.get_artist_id(h5, i) artist_latitude = GETTERS.get_artist_latitude(h5, i) artist_location = GETTERS.get_artist_location(h5, i) artist_longitude = GETTERS.get_artist_longitude(h5, i) artist_mbid = GETTERS.get_artist_mbid(h5, i) artist_mbtags = ','.join( str(e) for e in GETTERS.get_artist_mbtags(h5, i)) # array artist_mbtags_count = ','.join( str(e) for e in GETTERS.get_artist_mbtags_count(h5, i)) # array artist_name = GETTERS.get_artist_name(h5, i) artist_playmeid = GETTERS.get_artist_playmeid(h5, i) artist_terms = ','.join( str(e) for e in GETTERS.get_artist_terms(h5, i)) # array #artist_terms_freq = ','.join(str(e) for e in GETTERS.get_artist_terms_freq(h5, i)) # array #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array #audio_md5 = GETTERS.get_audio_md5(h5, i) #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Familiarity'.lower(): ####Added by us! csvRowString += song.familiarity elif attribute == 'artist_mbid'.lower(): csvRowString += song.artist_mbid elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digid'.lower(): csvRowString += song.artist_7digid elif attribute == 'hottness'.lower(): csvRowString += song.hottness elif attribute == 'song_hottness'.lower(): csvRowString += song.song_hottness elif attribute == 'digitalid7'.lower(): csvRowString += song.digitalid7 elif attribute == 'similar_artists'.lower(): csvRowString += song.similar_artists elif attribute == 'artist_terms'.lower(): csvRowString += song.artist_terms elif attribute == 'art_terms_freq'.lower(): csvRowString += song.art_terms_freq elif attribute == 'art_terms_weight'.lower(): csvRowString += song.art_terms_weight elif attribute == 'a_sample_rate'.lower(): csvRowString += song.a_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += song.audio_md5 elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_conf'.lower(): csvRowString += song.mode_conf elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out elif attribute == 'trackid'.lower(): csvRowString += song.trackid elif attribute == 'segm_start'.lower(): csvRowString += song.segm_start elif attribute == 'segm_conf'.lower(): csvRowString += song.segm_conf elif attribute == 'segm_pitch'.lower(): csvRowString += song.segm_pitch elif attribute == 'segm_timbre'.lower(): csvRowString += song.segm_timbre elif attribute == 'segm_max_loud'.lower(): csvRowString += song.segm_max_loud elif attribute == 'segm_max_loud_time'.lower(): csvRowString += song.segm_max_loud_time elif attribute == 'segm_loud_start'.lower(): csvRowString += song.segm_loud_start elif attribute == 'sect_start'.lower(): csvRowString += song.sect_start elif attribute == 'sect_conf'.lower(): csvRowString += song.sect_conf elif attribute == 'beats_start'.lower(): csvRowString += song.beats_start elif attribute == 'beats_conf'.lower(): csvRowString += song.beats_conf elif attribute == 'bars_start'.lower(): csvRowString += song.bars_start elif attribute == 'bars_conf'.lower(): csvRowString += song.bars_conf elif attribute == 'tatums_start'.lower(): csvRowString += song.tatums_start elif attribute == 'tatums_conf'.lower(): csvRowString += song.tatums_conf elif attribute == 'artist_mbtags'.lower(): csvRowString += song.artist_mbtags elif attribute == 'artist_mbtags_count'.lower(): csvRowString += song.artist_mbtags_count elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print("==============") print("I believe there has been an error with the input.") print("==============") break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,Title,Year,Familiarity,Artist_Mbid,Artist_PlaymeId,Artist_7didId,Hottness,Song_Hottness,7digitalid,A_Sample_Rate,Audio_Md5,End_Of_Fade_In,Energy,Loudness,Mode,Mode_Conf,Start_Of_Fade_Out,TrackId" ################################################# csvAttributeList = re.split(',', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "/home/bigdata/smalltest/" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) # testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #########Added by us! song.familiarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.artist_playmeid = str( hdf5_getters.get_artist_playmeid(songH5File)) song.artist_7digid = str( hdf5_getters.get_artist_7digitalid(songH5File)) song.hottness = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.song_hottness = str( hdf5_getters.get_song_hotttnesss(songH5File)) song.digitalid7 = str( hdf5_getters.get_track_7digitalid(songH5File)) #song.similar_artists = str(hdf5_getters.get_similar_artists(songH5File)) #song.artist_terms = str(hdf5_getters.get_artist_terms(songH5File)) #song.art_terms_freq = str(hdf5_getters.get_artist_terms_freq(songH5File)) #song.art_terms_weight = str(hdf5_getters.get_artist_terms_weight(songH5File)) song.a_sample_rate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File)) song.end_of_fade_in = str( hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.mode_conf = str(hdf5_getters.get_mode_confidence(songH5File)) song.start_of_fade_out = str( hdf5_getters.get_start_of_fade_out(songH5File)) song.trackid = str(hdf5_getters.get_track_id(songH5File)) #song.segm_start = str(hdf5_getters.get_segments_start(songH5File)) #song.segm_conf = str(hdf5_getters.get_segments_confidence(songH5File)) #song.segm_pitch = str(hdf5_getters.get_segments_pitches(songH5File)) #song.segm_timbre = str(hdf5_getters.get_segments_timbre(songH5File)) #song.segm_max_loud = str(hdf5_getters.get_segments_loudness_max(songH5File)) #song.segm_max_loud_time = str(hdf5_getters.get_segments_loudness_max_time(songH5File)) #song.segm_loud_start = str(hdf5_getters.get_segments_loudness_start(songH5File)) #song.sect_start = str(hdf5_getters.get_sections_start(songH5File)) #song.sect_conf = str(hdf5_getters.get_sections_confidence(songH5File)) #song.beats_start = str(hdf5_getters.get_beats_start(songH5File)) #song.beats_conf = str(hdf5_getters.get_beats_confidence(songH5File)) #song.bars_start = str(hdf5_getters.get_bars_start(songH5File)) #song.bars_conf = str(hdf5_getters.get_bars_confidence(songH5File)) #song.tatums_start = str(hdf5_getters.get_tatums_start(songH5File)) #song.tatums_conf = str(hdf5_getters.get_tatums_confidence(songH5File)) #song.artist_mbtags = str(hdf5_getters.get_artist_mbtags(songH5File)) #song.artist_mbtags_count = str(hdf5_getters.get_artist_mbtags_count(songH5File)) #print song count #csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace("b\"", "") albumName = albumName.replace("\"", "") albumName = albumName.replace(',', "") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',', '') location = location.replace("b\"", "") location = location.replace("\"", "") csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): artistName = song.artistName artistName = artistName.replace("b\"", "") artistName = artistName.replace("\"", "") csvRowString += "\"" + artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): t = song.title t = t.replace("b\"", "") t = t.replace("\"", "") csvRowString += "\"" + t + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'Familiarity'.lower(): ####Added by us! csvRowString += song.familiarity elif attribute == 'artist_mbid'.lower(): csvRowString += "\"" + song.artist_mbid + "\"" elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digid'.lower(): csvRowString += song.artist_7digid elif attribute == 'hottness'.lower(): csvRowString += song.hottness elif attribute == 'song_hottness'.lower(): csvRowString += song.song_hottness elif attribute == 'digitalid7'.lower(): csvRowString += song.digitalid7 elif attribute == 'similar_artists'.lower(): csvRowString += song.similar_artists elif attribute == 'artist_terms'.lower(): csvRowString += song.artist_terms elif attribute == 'art_terms_freq'.lower(): csvRowString += song.art_terms_freq elif attribute == 'art_terms_weight'.lower(): csvRowString += song.art_terms_weight elif attribute == 'a_sample_rate'.lower(): csvRowString += song.a_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += "\"" + song.audio_md5 + "\"" elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_conf'.lower(): csvRowString += song.mode_conf elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out elif attribute == 'trackid'.lower(): csvRowString += "\"" + song.trackid + "\"" elif attribute == 'segm_start'.lower(): csvRowString += song.segm_start elif attribute == 'segm_conf'.lower(): csvRowString += song.segm_conf elif attribute == 'segm_pitch'.lower(): csvRowString += song.segm_pitch elif attribute == 'segm_timbre'.lower(): csvRowString += song.segm_timbre elif attribute == 'segm_max_loud'.lower(): csvRowString += song.segm_max_loud elif attribute == 'segm_max_loud_time'.lower(): csvRowString += song.segm_max_loud_time elif attribute == 'segm_loud_start'.lower(): csvRowString += song.segm_loud_start elif attribute == 'sect_start'.lower(): csvRowString += song.sect_start elif attribute == 'sect_conf'.lower(): csvRowString += song.sect_conf elif attribute == 'beats_start'.lower(): csvRowString += song.beats_start elif attribute == 'beats_conf'.lower(): csvRowString += song.beats_conf elif attribute == 'bars_start'.lower(): csvRowString += song.bars_start elif attribute == 'bars_conf'.lower(): csvRowString += song.bars_conf elif attribute == 'tatums_start'.lower(): csvRowString += song.tatums_start elif attribute == 'tatums_conf'.lower(): csvRowString += song.tatums_conf elif attribute == 'artist_mbtags'.lower(): csvRowString += song.artist_mbtags elif attribute == 'artist_mbtags_count'.lower(): csvRowString += song.artist_mbtags_count else: csvRowString += "\"ERR\"" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
def data_to_flat_file(basedir,ext='.h5') : """ This function extracts the information from the tables and creates the flat file. """ count = 0; #song counter list_to_write= [] group_index=0 row_to_write = "" writer = csv.writer(open("complete.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: row=[] print f h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') row.append(title) comma=title.find(',') if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') row.append(album) comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') row.append(artist_name) duration = hdf5_getters.get_duration(h5) row.append(duration) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) row.append(samp_rt) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) row.append(artist_7digitalid) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 row.append(artist_fam) artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 row.append(artist_hotness) artist_id = hdf5_getters.get_artist_id(h5) row.append(artist_id) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 row.append(artist_lat) artist_loc = hdf5_getters.get_artist_location(h5) row.append(artist_loc) artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 row.append(artist_lon) artist_mbid = hdf5_getters.get_artist_mbid(h5) row.append(artist_mbid) #Getting the genre art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq genre_set=0 #flag to see if the genre has been set or not final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 if genre_set == 1: col_num=[] for i in final_genre: column=int(i) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array for i in range(len(genre_array)): #appending the genre_array to the row row.append(genre_array[i]) else: genre_array=genre_columns(-1) #when there is no genre matched, return an array of [0...0] for i in range(len(genre_array)): #appending the genre_array to the row row.append(genre_array[i]) artist_pmid = hdf5_getters.get_artist_playmeid(h5) row.append(artist_pmid) audio_md5 = hdf5_getters.get_audio_md5(h5) row.append(audio_md5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 row.append(danceability) end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 row.append(end_fade_in) energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 row.append(energy) song_key = hdf5_getters.get_key(h5) row.append(song_key) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 row.append(key_c) loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 row.append(loudness) mode = hdf5_getters.get_mode(h5) row.append(mode) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 row.append(mode_conf) release_7digitalid = hdf5_getters.get_release_7digitalid(h5) row.append(release_7digitalid) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 row.append(song_hot) song_id = hdf5_getters.get_song_id(h5) row.append(song_id) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) row.append(start_fade_out) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 row.append(tempo) time_sig = hdf5_getters.get_time_signature(h5) row.append(time_sig) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 row.append(time_sig_c) track_id = hdf5_getters.get_track_id(h5) row.append(track_id) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) row.append(track_7digitalid) year = hdf5_getters.get_year(h5) row.append(year) bars_c = hdf5_getters.get_bars_confidence(h5) bars_start = hdf5_getters.get_bars_start(h5) row_bars_padding=padding(245) #this is the array that will be attached at the end of th row #--------------bars---------------" gral_info=[] gral_info=row[:] empty=[] for i,item in enumerate(bars_c): row.append(group_index) row.append(i) row.append(bars_c[i]) bars_c_avg= get_avg(bars_c) row.append(bars_c_avg) bars_c_max= get_max(bars_c) row.append(bars_c_max) bars_c_min = get_min(bars_c) row.append(bars_c_min) bars_c_stddev= get_stddev(bars_c) row.append(bars_c_stddev) bars_c_count = get_count(bars_c) row.append(bars_c_count) bars_c_sum = get_sum(bars_c) row.append(bars_c_sum) row.append(bars_start[i]) bars_start_avg = get_avg(bars_start) row.append(bars_start_avg) bars_start_max= get_max(bars_start) row.append(bars_start_max) bars_start_min = get_min(bars_start) row.append(bars_start_min) bars_start_stddev= get_stddev(bars_start) row.append(bars_start_stddev) bars_start_count = get_count(bars_start) row.append(bars_start_count) bars_start_sum = get_sum(bars_start) row.append(bars_start_sum) for i in row_bars_padding: row.append(i) writer.writerow(row) row=[] row=gral_info[:] #--------beats---------------" beats_c = hdf5_getters.get_beats_confidence(h5) group_index=1 row=[] row=gral_info[:] row_front=padding(14) #blanks left in front of the row(empty spaces for bars) row_beats_padding=padding(231) for i,item in enumerate(beats_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the beats row.append(index) row.append(beats_c[i]) beats_c_avg= get_avg(beats_c) row.append(beats_c_avg) beats_c_max= get_max(beats_c) row.append(beats_c_max) beats_c_min = get_min(beats_c) row.append(beats_c_min) beats_c_stddev= get_stddev(beats_c) row.append(beats_c_stddev) beats_c_count = get_count(beats_c) row.append(beats_c_count) beats_c_sum = get_sum(beats_c) row.append(beats_c_sum) beats_start = hdf5_getters.get_beats_start(h5) row.append(beats_start[i]) beats_start_avg = get_avg(beats_start) row.append(beats_start_avg) beats_start_max= get_max(beats_start) row.append(beats_start_max) beats_start_min = get_min(beats_start) row.append(beats_start_min) beats_start_stddev= get_stddev(beats_start) row.append(beats_start_stddev) beats_start_count = get_count(beats_start) row.append(beats_start_count) beats_start_sum = get_sum(beats_start) row.append(beats_start_sum) for i in row_beats_padding: row.append(i) writer.writerow(row) row=[] row=gral_info[:] # "--------sections---------------" row_sec_padding=padding(217) #blank spaces left at the end of the row sec_c = hdf5_getters.get_sections_confidence(h5) group_index=2 row=[] row=gral_info[:] row_front=padding(28) #blank spaces left in front(empty spaces for bars,beats) for i,item in enumerate(sec_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the sections row.append(index) row.append(sec_c[i]) sec_c_avg= get_avg(sec_c) row.append(sec_c_avg) sec_c_max= get_max(sec_c) row.append(sec_c_max) sec_c_min = get_min(sec_c) row.append(sec_c_min) sec_c_stddev= get_stddev(sec_c) row.append(sec_c_stddev) sec_c_count = get_count(sec_c) row.append(sec_c_count) sec_c_sum = get_sum(sec_c) row.append(sec_c_sum) sec_start = hdf5_getters.get_sections_start(h5) row.append(sec_start[i]) sec_start_avg = get_avg(sec_start) row.append(sec_start_avg) sec_start_max= get_max(sec_start) row.append(sec_start_max) sec_start_min = get_min(sec_start) row.append(sec_start_min) sec_start_stddev= get_stddev(sec_start) row.append(sec_start_stddev) sec_start_count = get_count(sec_start) row.append(sec_start_count) sec_start_sum = get_sum(sec_start) row.append(sec_start_sum) for i in row_sec_padding: #appending the blank spaces at the end of the row row.append(i) writer.writerow(row) row=[] row=gral_info[:] #--------segments---------------" row_seg_padding=padding(182) #blank spaces at the end of the row row_front=padding(42) #blank spaces left in front of segments seg_c = hdf5_getters.get_segments_confidence(h5) group_index=3 row=[] row=gral_info[:] for i,item in enumerate(seg_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the segments row.append(index) row.append(seg_c[i]) seg_c_avg= get_avg(seg_c) row.append(seg_c_avg) seg_c_max= get_max(seg_c) row.append(seg_c_max) seg_c_min = get_min(seg_c) row.append(seg_c_min) seg_c_stddev= get_stddev(seg_c) row.append(seg_c_stddev) seg_c_count = get_count(seg_c) row.append(seg_c_count) seg_c_sum = get_sum(seg_c) row.append(seg_c_sum) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) row.append(seg_loud_max[i]) seg_loud_max_avg= get_avg(seg_loud_max) row.append(seg_loud_max_avg) seg_loud_max_max= get_max(seg_loud_max) row.append(seg_loud_max_max) seg_loud_max_min = get_min(seg_loud_max) row.append(seg_loud_max_min) seg_loud_max_stddev= get_stddev(seg_loud_max) row.append(seg_loud_max_stddev) seg_loud_max_count = get_count(seg_loud_max) row.append(seg_loud_max_count) seg_loud_max_sum = get_sum(seg_loud_max) row.append(seg_loud_max_sum) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) row.append(seg_loud_max_time[i]) seg_loud_max_time_avg= get_avg(seg_loud_max_time) row.append(seg_loud_max_time_avg) seg_loud_max_time_max= get_max(seg_loud_max_time) row.append(seg_loud_max_time_max) seg_loud_max_time_min = get_min(seg_loud_max_time) row.append(seg_loud_max_time_min) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) row.append(seg_loud_max_time_stddev) seg_loud_max_time_count = get_count(seg_loud_max_time) row.append(seg_loud_max_time_count) seg_loud_max_time_sum = get_sum(seg_loud_max_time) row.append(seg_loud_max_time_sum) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) row.append(seg_loud_start[i]) seg_loud_start_avg= get_avg(seg_loud_start) row.append(seg_loud_start_avg) seg_loud_start_max= get_max(seg_loud_start) row.append(seg_loud_start_max) seg_loud_start_min = get_min(seg_loud_start) row.append(seg_loud_start_min) seg_loud_start_stddev= get_stddev(seg_loud_start) row.append(seg_loud_start_stddev) seg_loud_start_count = get_count(seg_loud_start) row.append(seg_loud_start_count) seg_loud_start_sum = get_sum(seg_loud_start) row.append(seg_loud_start_sum) seg_start = hdf5_getters.get_segments_start(h5) row.append(seg_start[i]) seg_start_avg= get_avg(seg_start) row.append(seg_start_avg) seg_start_max= get_max(seg_start) row.append(seg_start_max) seg_start_min = get_min(seg_start) row.append(seg_start_min) seg_start_stddev= get_stddev(seg_start) row.append(seg_start_stddev) seg_start_count = get_count(seg_start) row.append(seg_start_count) seg_start_sum = get_sum(seg_start) row.append(seg_start_sum) for i in row_seg_padding: #appending blank spaces at the end of the row row.append(i) writer.writerow(row) row=[] row=gral_info[:] #----------segments pitch and timbre---------------" row_seg2_padding=padding(14) #blank spaces left at the end of the row row_front=padding(77) #blank spaces left at the front of the segments and timbre seg_pitch = hdf5_getters.get_segments_pitches(h5) transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows group_index=4 row=[] row=gral_info[:] for i,item in enumerate(transpose_pitch[0]): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of segments and timbre row.append(index) row.append(transpose_pitch[0][i]) seg_pitch_avg= get_avg(transpose_pitch[0]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[0]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[0]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[0]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[0]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[0]) row.append(seg_pitch_sum) row.append(transpose_pitch[1][i]) seg_pitch_avg= get_avg(transpose_pitch[1]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[1]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[1]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[1]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[1]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[1]) row.append(seg_pitch_sum) row.append(transpose_pitch[2][i]) seg_pitch_avg= get_avg(transpose_pitch[2]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[2]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[2]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[2]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[2]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[2]) row.append(seg_pitch_sum) row.append(transpose_pitch[3][i]) seg_pitch_avg= get_avg(transpose_pitch[3]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[3]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[3]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[3]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[3]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[3]) row.append(seg_pitch_sum) row.append(transpose_pitch[4][i]) seg_pitch_avg= get_avg(transpose_pitch[4]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[4]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[4]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[4]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[4]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[4]) row.append(seg_pitch_sum) row.append(transpose_pitch[5][i]) seg_pitch_avg= get_avg(transpose_pitch[5]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[5]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[5]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[5]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[5]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[5]) row.append(seg_pitch_sum) row.append(transpose_pitch[6][i]) seg_pitch_avg= get_avg(transpose_pitch[6]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[6]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[6]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[6]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[6]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[6]) row.append(seg_pitch_sum) row.append(transpose_pitch[7][i]) seg_pitch_avg= get_avg(transpose_pitch[7]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[7]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[7]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[7]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[7]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[7]) row.append(seg_pitch_sum) row.append(transpose_pitch[8][i]) seg_pitch_avg= get_avg(transpose_pitch[8]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[8]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[8]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[8]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[8]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[8]) row.append(seg_pitch_sum) row.append(transpose_pitch[9][i]) seg_pitch_avg= get_avg(transpose_pitch[9]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[9]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[9]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[9]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[9]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[9]) row.append(seg_pitch_sum) row.append(transpose_pitch[10][i]) seg_pitch_avg= get_avg(transpose_pitch[10]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[10]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[10]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[10]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[10]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[10]) row.append(seg_pitch_sum) row.append(transpose_pitch[11][i]) seg_pitch_avg= get_avg(transpose_pitch[11]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[11]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[11]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[11]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[11]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[11]) row.append(seg_pitch_sum) #timbre arrays seg_timbre = hdf5_getters.get_segments_timbre(h5) transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows row.append(transpose_timbre[0][i]) seg_timbre_avg= get_avg(transpose_timbre[0]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[0]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[0]) row.append(seg_timbre_min) seg_timbre_stddev=get_stddev(transpose_timbre[0]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[0]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[0]) row.append(seg_timbre_sum) row.append(transpose_timbre[1][i]) seg_timbre_avg= get_avg(transpose_timbre[1]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[1]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[1]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[1]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[1]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[1]) row.append(seg_timbre_sum) row.append(transpose_timbre[2][i]) seg_timbre_avg= get_avg(transpose_timbre[2]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[2]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[2]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[2]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[2]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[2]) row.append(seg_timbre_sum) row.append(transpose_timbre[3][i]) seg_timbre_avg= get_avg(transpose_timbre[3]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[3]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[3]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[3]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[3]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[3]) row.append(seg_timbre_sum) row.append(transpose_timbre[4][i]) seg_timbre_avg= get_avg(transpose_timbre[4]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[4]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[4]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[4]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[4]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[4]) row.append(seg_timbre_sum) row.append(transpose_timbre[5][i]) seg_timbre_avg= get_avg(transpose_timbre[5]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[5]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[5]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[5]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[5]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[5]) row.append(seg_timbre_sum) row.append(transpose_timbre[6][i]) seg_timbre_avg= get_avg(transpose_timbre[6]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[6]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[6]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[6]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[6]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[6]) row.append(seg_timbre_sum) row.append(transpose_timbre[7][i]) seg_timbre_avg= get_avg(transpose_timbre[7]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[7]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[7]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[7]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[7]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[7]) row.append(seg_timbre_sum) row.append(transpose_timbre[8][i]) seg_timbre_avg= get_avg(transpose_timbre[8]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[8]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[8]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[8]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[8]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[8]) row.append(seg_timbre_sum) row.append(transpose_timbre[9][i]) seg_timbre_avg= get_avg(transpose_timbre[9]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[9]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[9]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[9]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[9]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[9]) row.append(seg_timbre_sum) row.append(transpose_timbre[10][i]) seg_timbre_avg= get_avg(transpose_timbre[10]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[10]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[10]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[10]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[10]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[10]) row.append(seg_timbre_sum) row.append(transpose_timbre[11][i]) seg_timbre_avg= get_avg(transpose_timbre[11]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[11]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[11]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[11]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[11]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[11]) row.append(seg_timbre_sum) for item in row_seg2_padding: row.append(item) writer.writerow(row) row=[] row=gral_info[:] # "--------tatums---------------" tatms_c = hdf5_getters.get_tatums_confidence(h5) group_index=5 row_front=padding(245) #blank spaces left in front of tatums row=[] row=gral_info[:] for i,item in enumerate(tatms_c): row.append(group_index) row.append(i) for item in row_front: #appending blank spaces at the front of the row row.append(item) row.append(tatms_c[i]) tatms_c_avg= get_avg(tatms_c) row.append(tatms_c_avg) tatms_c_max= get_max(tatms_c) row.append(tatms_c_max) tatms_c_min = get_min(tatms_c) row.append(tatms_c_min) tatms_c_stddev= get_stddev(tatms_c) row.append(tatms_c_stddev) tatms_c_count = get_count(tatms_c) row.append(tatms_c_count) tatms_c_sum = get_sum(tatms_c) row.append(tatms_c_sum) tatms_start = hdf5_getters.get_tatums_start(h5) row.append(tatms_start[i]) tatms_start_avg= get_avg(tatms_start) row.append(tatms_start_avg) tatms_start_max= get_max(tatms_start) row.append(tatms_start_max) tatms_start_min = get_min(tatms_start) row.append(tatms_start_min) tatms_start_stddev= get_stddev(tatms_start) row.append(tatms_start_stddev) tatms_start_count = get_count(tatms_start) row.append(tatms_start_count) tatms_start_sum = get_sum(tatms_start) row.append(tatms_start_sum) writer.writerow(row) row=[] row=gral_info[:] transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 h5.close() count=count+1; print count;
def data_to_flat_file(basedir, ext='.h5'): """ This function extracts the information from the tables and creates the flat file. """ count = 0 #song counter list_to_write = [] group_index = 0 row_to_write = "" writer = csv.writer(open("complete.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: row = [] print f h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title = title.replace('"', '') row.append(title) comma = title.find(',') if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album = album.replace('"', '') row.append(album) comma = album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma = artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name = artist_name.replace('"', '') row.append(artist_name) duration = hdf5_getters.get_duration(h5) row.append(duration) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) row.append(samp_rt) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) row.append(artist_7digitalid) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam = -1 row.append(artist_fam) artist_hotness = hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness = -1 row.append(artist_hotness) artist_id = hdf5_getters.get_artist_id(h5) row.append(artist_id) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat = -1 row.append(artist_lat) artist_loc = hdf5_getters.get_artist_location(h5) row.append(artist_loc) artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon = -1 row.append(artist_lon) artist_mbid = hdf5_getters.get_artist_mbid(h5) row.append(artist_mbid) #Getting the genre art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes = get_genre_indexes( trm_freq) #index of the highest freq genre_set = 0 #flag to see if the genre has been set or not final_genre = [] genres_so_far = [] for i in range(len(genre_indexes)): genre_tmp = get_genre( art_trm, genre_indexes[i] ) #genre that corresponds to the highest freq genres_so_far = genre_dict.get_genre_in_dict( genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set = 1 if genre_set == 1: col_num = [] for i in final_genre: column = int(i) #getting the column number of the genre col_num.append(column) genre_array = genre_columns(col_num) #genre array for i in range(len( genre_array)): #appending the genre_array to the row row.append(genre_array[i]) else: genre_array = genre_columns( -1 ) #when there is no genre matched, return an array of [0...0] for i in range(len( genre_array)): #appending the genre_array to the row row.append(genre_array[i]) artist_pmid = hdf5_getters.get_artist_playmeid(h5) row.append(artist_pmid) audio_md5 = hdf5_getters.get_audio_md5(h5) row.append(audio_md5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability = -1 row.append(danceability) end_fade_in = hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in = -1 row.append(end_fade_in) energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy = -1 row.append(energy) song_key = hdf5_getters.get_key(h5) row.append(song_key) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c = -1 row.append(key_c) loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness = -1 row.append(loudness) mode = hdf5_getters.get_mode(h5) row.append(mode) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf = -1 row.append(mode_conf) release_7digitalid = hdf5_getters.get_release_7digitalid(h5) row.append(release_7digitalid) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot = -1 row.append(song_hot) song_id = hdf5_getters.get_song_id(h5) row.append(song_id) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) row.append(start_fade_out) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo = -1 row.append(tempo) time_sig = hdf5_getters.get_time_signature(h5) row.append(time_sig) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c = -1 row.append(time_sig_c) track_id = hdf5_getters.get_track_id(h5) row.append(track_id) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) row.append(track_7digitalid) year = hdf5_getters.get_year(h5) row.append(year) bars_c = hdf5_getters.get_bars_confidence(h5) bars_start = hdf5_getters.get_bars_start(h5) row_bars_padding = padding( 245 ) #this is the array that will be attached at the end of th row #--------------bars---------------" gral_info = [] gral_info = row[:] empty = [] for i, item in enumerate(bars_c): row.append(group_index) row.append(i) row.append(bars_c[i]) bars_c_avg = get_avg(bars_c) row.append(bars_c_avg) bars_c_max = get_max(bars_c) row.append(bars_c_max) bars_c_min = get_min(bars_c) row.append(bars_c_min) bars_c_stddev = get_stddev(bars_c) row.append(bars_c_stddev) bars_c_count = get_count(bars_c) row.append(bars_c_count) bars_c_sum = get_sum(bars_c) row.append(bars_c_sum) row.append(bars_start[i]) bars_start_avg = get_avg(bars_start) row.append(bars_start_avg) bars_start_max = get_max(bars_start) row.append(bars_start_max) bars_start_min = get_min(bars_start) row.append(bars_start_min) bars_start_stddev = get_stddev(bars_start) row.append(bars_start_stddev) bars_start_count = get_count(bars_start) row.append(bars_start_count) bars_start_sum = get_sum(bars_start) row.append(bars_start_sum) for i in row_bars_padding: row.append(i) writer.writerow(row) row = [] row = gral_info[:] #--------beats---------------" beats_c = hdf5_getters.get_beats_confidence(h5) group_index = 1 row = [] row = gral_info[:] row_front = padding( 14) #blanks left in front of the row(empty spaces for bars) row_beats_padding = padding(231) for i, item in enumerate(beats_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the beats row.append(index) row.append(beats_c[i]) beats_c_avg = get_avg(beats_c) row.append(beats_c_avg) beats_c_max = get_max(beats_c) row.append(beats_c_max) beats_c_min = get_min(beats_c) row.append(beats_c_min) beats_c_stddev = get_stddev(beats_c) row.append(beats_c_stddev) beats_c_count = get_count(beats_c) row.append(beats_c_count) beats_c_sum = get_sum(beats_c) row.append(beats_c_sum) beats_start = hdf5_getters.get_beats_start(h5) row.append(beats_start[i]) beats_start_avg = get_avg(beats_start) row.append(beats_start_avg) beats_start_max = get_max(beats_start) row.append(beats_start_max) beats_start_min = get_min(beats_start) row.append(beats_start_min) beats_start_stddev = get_stddev(beats_start) row.append(beats_start_stddev) beats_start_count = get_count(beats_start) row.append(beats_start_count) beats_start_sum = get_sum(beats_start) row.append(beats_start_sum) for i in row_beats_padding: row.append(i) writer.writerow(row) row = [] row = gral_info[:] # "--------sections---------------" row_sec_padding = padding( 217) #blank spaces left at the end of the row sec_c = hdf5_getters.get_sections_confidence(h5) group_index = 2 row = [] row = gral_info[:] row_front = padding( 28) #blank spaces left in front(empty spaces for bars,beats) for i, item in enumerate(sec_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the sections row.append(index) row.append(sec_c[i]) sec_c_avg = get_avg(sec_c) row.append(sec_c_avg) sec_c_max = get_max(sec_c) row.append(sec_c_max) sec_c_min = get_min(sec_c) row.append(sec_c_min) sec_c_stddev = get_stddev(sec_c) row.append(sec_c_stddev) sec_c_count = get_count(sec_c) row.append(sec_c_count) sec_c_sum = get_sum(sec_c) row.append(sec_c_sum) sec_start = hdf5_getters.get_sections_start(h5) row.append(sec_start[i]) sec_start_avg = get_avg(sec_start) row.append(sec_start_avg) sec_start_max = get_max(sec_start) row.append(sec_start_max) sec_start_min = get_min(sec_start) row.append(sec_start_min) sec_start_stddev = get_stddev(sec_start) row.append(sec_start_stddev) sec_start_count = get_count(sec_start) row.append(sec_start_count) sec_start_sum = get_sum(sec_start) row.append(sec_start_sum) for i in row_sec_padding: #appending the blank spaces at the end of the row row.append(i) writer.writerow(row) row = [] row = gral_info[:] #--------segments---------------" row_seg_padding = padding(182) #blank spaces at the end of the row row_front = padding(42) #blank spaces left in front of segments seg_c = hdf5_getters.get_segments_confidence(h5) group_index = 3 row = [] row = gral_info[:] for i, item in enumerate(seg_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the segments row.append(index) row.append(seg_c[i]) seg_c_avg = get_avg(seg_c) row.append(seg_c_avg) seg_c_max = get_max(seg_c) row.append(seg_c_max) seg_c_min = get_min(seg_c) row.append(seg_c_min) seg_c_stddev = get_stddev(seg_c) row.append(seg_c_stddev) seg_c_count = get_count(seg_c) row.append(seg_c_count) seg_c_sum = get_sum(seg_c) row.append(seg_c_sum) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) row.append(seg_loud_max[i]) seg_loud_max_avg = get_avg(seg_loud_max) row.append(seg_loud_max_avg) seg_loud_max_max = get_max(seg_loud_max) row.append(seg_loud_max_max) seg_loud_max_min = get_min(seg_loud_max) row.append(seg_loud_max_min) seg_loud_max_stddev = get_stddev(seg_loud_max) row.append(seg_loud_max_stddev) seg_loud_max_count = get_count(seg_loud_max) row.append(seg_loud_max_count) seg_loud_max_sum = get_sum(seg_loud_max) row.append(seg_loud_max_sum) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time( h5) row.append(seg_loud_max_time[i]) seg_loud_max_time_avg = get_avg(seg_loud_max_time) row.append(seg_loud_max_time_avg) seg_loud_max_time_max = get_max(seg_loud_max_time) row.append(seg_loud_max_time_max) seg_loud_max_time_min = get_min(seg_loud_max_time) row.append(seg_loud_max_time_min) seg_loud_max_time_stddev = get_stddev(seg_loud_max_time) row.append(seg_loud_max_time_stddev) seg_loud_max_time_count = get_count(seg_loud_max_time) row.append(seg_loud_max_time_count) seg_loud_max_time_sum = get_sum(seg_loud_max_time) row.append(seg_loud_max_time_sum) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) row.append(seg_loud_start[i]) seg_loud_start_avg = get_avg(seg_loud_start) row.append(seg_loud_start_avg) seg_loud_start_max = get_max(seg_loud_start) row.append(seg_loud_start_max) seg_loud_start_min = get_min(seg_loud_start) row.append(seg_loud_start_min) seg_loud_start_stddev = get_stddev(seg_loud_start) row.append(seg_loud_start_stddev) seg_loud_start_count = get_count(seg_loud_start) row.append(seg_loud_start_count) seg_loud_start_sum = get_sum(seg_loud_start) row.append(seg_loud_start_sum) seg_start = hdf5_getters.get_segments_start(h5) row.append(seg_start[i]) seg_start_avg = get_avg(seg_start) row.append(seg_start_avg) seg_start_max = get_max(seg_start) row.append(seg_start_max) seg_start_min = get_min(seg_start) row.append(seg_start_min) seg_start_stddev = get_stddev(seg_start) row.append(seg_start_stddev) seg_start_count = get_count(seg_start) row.append(seg_start_count) seg_start_sum = get_sum(seg_start) row.append(seg_start_sum) for i in row_seg_padding: #appending blank spaces at the end of the row row.append(i) writer.writerow(row) row = [] row = gral_info[:] #----------segments pitch and timbre---------------" row_seg2_padding = padding( 14) #blank spaces left at the end of the row row_front = padding( 77) #blank spaces left at the front of the segments and timbre seg_pitch = hdf5_getters.get_segments_pitches(h5) transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows group_index = 4 row = [] row = gral_info[:] for i, item in enumerate(transpose_pitch[0]): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of segments and timbre row.append(index) row.append(transpose_pitch[0][i]) seg_pitch_avg = get_avg(transpose_pitch[0]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[0]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[0]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[0]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[0]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[0]) row.append(seg_pitch_sum) row.append(transpose_pitch[1][i]) seg_pitch_avg = get_avg(transpose_pitch[1]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[1]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[1]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[1]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[1]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[1]) row.append(seg_pitch_sum) row.append(transpose_pitch[2][i]) seg_pitch_avg = get_avg(transpose_pitch[2]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[2]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[2]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[2]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[2]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[2]) row.append(seg_pitch_sum) row.append(transpose_pitch[3][i]) seg_pitch_avg = get_avg(transpose_pitch[3]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[3]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[3]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[3]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[3]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[3]) row.append(seg_pitch_sum) row.append(transpose_pitch[4][i]) seg_pitch_avg = get_avg(transpose_pitch[4]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[4]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[4]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[4]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[4]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[4]) row.append(seg_pitch_sum) row.append(transpose_pitch[5][i]) seg_pitch_avg = get_avg(transpose_pitch[5]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[5]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[5]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[5]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[5]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[5]) row.append(seg_pitch_sum) row.append(transpose_pitch[6][i]) seg_pitch_avg = get_avg(transpose_pitch[6]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[6]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[6]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[6]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[6]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[6]) row.append(seg_pitch_sum) row.append(transpose_pitch[7][i]) seg_pitch_avg = get_avg(transpose_pitch[7]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[7]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[7]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[7]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[7]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[7]) row.append(seg_pitch_sum) row.append(transpose_pitch[8][i]) seg_pitch_avg = get_avg(transpose_pitch[8]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[8]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[8]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[8]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[8]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[8]) row.append(seg_pitch_sum) row.append(transpose_pitch[9][i]) seg_pitch_avg = get_avg(transpose_pitch[9]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[9]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[9]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[9]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[9]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[9]) row.append(seg_pitch_sum) row.append(transpose_pitch[10][i]) seg_pitch_avg = get_avg(transpose_pitch[10]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[10]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[10]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[10]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[10]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[10]) row.append(seg_pitch_sum) row.append(transpose_pitch[11][i]) seg_pitch_avg = get_avg(transpose_pitch[11]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[11]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[11]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[11]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[11]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[11]) row.append(seg_pitch_sum) #timbre arrays seg_timbre = hdf5_getters.get_segments_timbre(h5) transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows row.append(transpose_timbre[0][i]) seg_timbre_avg = get_avg(transpose_timbre[0]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[0]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[0]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[0]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[0]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[0]) row.append(seg_timbre_sum) row.append(transpose_timbre[1][i]) seg_timbre_avg = get_avg(transpose_timbre[1]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[1]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[1]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[1]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[1]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[1]) row.append(seg_timbre_sum) row.append(transpose_timbre[2][i]) seg_timbre_avg = get_avg(transpose_timbre[2]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[2]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[2]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[2]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[2]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[2]) row.append(seg_timbre_sum) row.append(transpose_timbre[3][i]) seg_timbre_avg = get_avg(transpose_timbre[3]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[3]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[3]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[3]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[3]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[3]) row.append(seg_timbre_sum) row.append(transpose_timbre[4][i]) seg_timbre_avg = get_avg(transpose_timbre[4]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[4]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[4]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[4]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[4]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[4]) row.append(seg_timbre_sum) row.append(transpose_timbre[5][i]) seg_timbre_avg = get_avg(transpose_timbre[5]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[5]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[5]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[5]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[5]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[5]) row.append(seg_timbre_sum) row.append(transpose_timbre[6][i]) seg_timbre_avg = get_avg(transpose_timbre[6]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[6]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[6]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[6]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[6]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[6]) row.append(seg_timbre_sum) row.append(transpose_timbre[7][i]) seg_timbre_avg = get_avg(transpose_timbre[7]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[7]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[7]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[7]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[7]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[7]) row.append(seg_timbre_sum) row.append(transpose_timbre[8][i]) seg_timbre_avg = get_avg(transpose_timbre[8]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[8]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[8]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[8]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[8]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[8]) row.append(seg_timbre_sum) row.append(transpose_timbre[9][i]) seg_timbre_avg = get_avg(transpose_timbre[9]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[9]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[9]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[9]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[9]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[9]) row.append(seg_timbre_sum) row.append(transpose_timbre[10][i]) seg_timbre_avg = get_avg(transpose_timbre[10]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[10]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[10]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[10]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[10]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[10]) row.append(seg_timbre_sum) row.append(transpose_timbre[11][i]) seg_timbre_avg = get_avg(transpose_timbre[11]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[11]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[11]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[11]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[11]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[11]) row.append(seg_timbre_sum) for item in row_seg2_padding: row.append(item) writer.writerow(row) row = [] row = gral_info[:] # "--------tatums---------------" tatms_c = hdf5_getters.get_tatums_confidence(h5) group_index = 5 row_front = padding(245) #blank spaces left in front of tatums row = [] row = gral_info[:] for i, item in enumerate(tatms_c): row.append(group_index) row.append(i) for item in row_front: #appending blank spaces at the front of the row row.append(item) row.append(tatms_c[i]) tatms_c_avg = get_avg(tatms_c) row.append(tatms_c_avg) tatms_c_max = get_max(tatms_c) row.append(tatms_c_max) tatms_c_min = get_min(tatms_c) row.append(tatms_c_min) tatms_c_stddev = get_stddev(tatms_c) row.append(tatms_c_stddev) tatms_c_count = get_count(tatms_c) row.append(tatms_c_count) tatms_c_sum = get_sum(tatms_c) row.append(tatms_c_sum) tatms_start = hdf5_getters.get_tatums_start(h5) row.append(tatms_start[i]) tatms_start_avg = get_avg(tatms_start) row.append(tatms_start_avg) tatms_start_max = get_max(tatms_start) row.append(tatms_start_max) tatms_start_min = get_min(tatms_start) row.append(tatms_start_min) tatms_start_stddev = get_stddev(tatms_start) row.append(tatms_start_stddev) tatms_start_count = get_count(tatms_start) row.append(tatms_start_count) tatms_start_sum = get_sum(tatms_start) row.append(tatms_start_sum) writer.writerow(row) row = [] row = gral_info[:] transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg = [] seg_pitch_max = [] seg_pitch_min = [] seg_pitch_stddev = [] seg_pitch_count = [] seg_pitch_sum = [] i = 0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i = i + 1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg = [] seg_timbre_max = [] seg_timbre_min = [] seg_timbre_stddev = [] seg_timbre_count = [] seg_timbre_sum = [] i = 0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i = i + 1 h5.close() count = count + 1 print count
def main(): outputFile = open('songs.csv', 'w') writer = csv.writer(outputFile) csvRowString = "song_number,artist_familiarity,artist_hotttnesss,artist_id,artist_mbid,artist_playmeid,artist_7digitalid,artist_latitude,artist_longitude,artist_location,artist_name,release,release_7digitalid,song_id,song_hotttnesss,title,track_7digitalid,analysis_sample_rate,audio_md5,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out,tempo,time_signature,time_signature_confidence,track_id,year" outputFile.write(csvRowString + "\n") csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "." # "." As the default means the current directory ext = ".H5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP songCount = 0 for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) values = [ songCount, hdf5_getters.get_artist_familiarity(songH5File), hdf5_getters.get_artist_hotttnesss(songH5File), hdf5_getters.get_artist_id(songH5File), hdf5_getters.get_artist_mbid(songH5File), hdf5_getters.get_artist_playmeid(songH5File), hdf5_getters.get_artist_7digitalid(songH5File), hdf5_getters.get_artist_latitude(songH5File), hdf5_getters.get_artist_longitude(songH5File), hdf5_getters.get_artist_location(songH5File), hdf5_getters.get_artist_name(songH5File), hdf5_getters.get_release(songH5File), hdf5_getters.get_release_7digitalid(songH5File), hdf5_getters.get_song_id(songH5File), hdf5_getters.get_song_hotttnesss(songH5File), hdf5_getters.get_title(songH5File), hdf5_getters.get_track_7digitalid(songH5File), hdf5_getters.get_analysis_sample_rate(songH5File), hdf5_getters.get_audio_md5(songH5File), hdf5_getters.get_danceability(songH5File), hdf5_getters.get_duration(songH5File), hdf5_getters.get_end_of_fade_in(songH5File), hdf5_getters.get_energy(songH5File), hdf5_getters.get_key(songH5File), hdf5_getters.get_key_confidence(songH5File), hdf5_getters.get_loudness(songH5File), hdf5_getters.get_mode(songH5File), hdf5_getters.get_mode_confidence(songH5File), hdf5_getters.get_start_of_fade_out(songH5File), hdf5_getters.get_tempo(songH5File), hdf5_getters.get_time_signature(songH5File), hdf5_getters.get_time_signature_confidence(songH5File), hdf5_getters.get_track_id(songH5File), hdf5_getters.get_year(songH5File) ] songH5File.close() songCount = songCount + 1 writer.writerow(values) outputFile.close()
song.albumID = remove_trap_characters( str(hdf5_getters.get_release_7digitalid(songH5File))) song.artistLatitude = remove_trap_characters( str(hdf5_getters.get_artist_latitude(songH5File))) # Replace the comma in the location (if there is one), since this will displace the entire row song.artistLocation = remove_trap_characters( str(hdf5_getters.get_artist_location(songH5File))).replace( ',', ':') song.artistLongitude = remove_trap_characters( str(hdf5_getters.get_artist_longitude(songH5File))) song.artistFamiliarity = remove_trap_characters( str(hdf5_getters.get_artist_familiarity(songH5File))) song.artistHotttnesss = remove_trap_characters( str(hdf5_getters.get_artist_hotttnesss(songH5File))) song.artistmbid = remove_trap_characters( str(hdf5_getters.get_artist_mbid(songH5File))) song.artistPlaymeid = remove_trap_characters( str(hdf5_getters.get_artist_playmeid(songH5File))) song.artist7digitalid = remove_trap_characters( str(hdf5_getters.get_artist_7digitalid(songH5File))) temp = hdf5_getters.get_artist_terms(songH5File) song.artistTerms = remove_trap_characters(str(list(temp))) song.artistTermsCount = get_list_length(temp) song.artistTermsFreq = remove_trap_characters( str(list(hdf5_getters.get_artist_terms_freq(songH5File)))) song.artistTermsWeight = remove_trap_characters( str(list(hdf5_getters.get_artist_terms_weight(songH5File)))) temp = hdf5_getters.get_artist_mbtags(songH5File) song.artistMBTags = remove_trap_characters(str(list(temp)))
def data_to_flat_file(basedir,ext='.h5') : """This function extract the information from the tables and creates the flat file.""" count = 0; #song counter list_to_write= [] row_to_write = "" writer = csv.writer(open("metadata_wholeA.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') comma=title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') #eliminating commas in the album comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,"); if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg= get_avg(bars_c) bars_c_max= get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev= get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max= get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev= get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg= get_avg(beats_c) beats_c_max= get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev= get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max= get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev= get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg= get_avg(sec_c) sec_c_max= get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev= get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max= get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev= get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg= get_avg(seg_c) seg_c_max= get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev= get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg= get_avg(seg_loud_max) seg_loud_max_max= get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev= get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg= get_avg(seg_loud_max_time) seg_loud_max_time_max= get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg= get_avg(seg_loud_start) seg_loud_start_max= get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev= get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg= get_avg(seg_start) seg_start_max= get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev= get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg= get_avg(tatms_c) tatms_c_max= get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev= get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg= get_avg(tatms_start) tatms_start_max= get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev= get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 #genre was found in dictionary if genre_set == 1: col_num=[] for genre in final_genre: column=int(genre) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array else: genre_array=genre_columns(-1) #the genre was not found in the dictionary transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 #Writing to the flat file writer.writerow([title,album,artist_name,year,duration,seg_start_count, tempo]) h5.close() count=count+1; print count;
writer_segments_tone_file = csv.writer(outputSegmentsToneFile, lineterminator='\n') writer_segments_loud_file = csv.writer(outputSegmentsLoudFile, lineterminator='\n') writer_sections_file = csv.writer(outputSectionsFile, lineterminator='\n') writer_beats_file = csv.writer(outputBeatsFile, lineterminator='\n') writer_bars_file = csv.writer(outputBarsFile, lineterminator='\n') writer_tatums_file = csv.writer(outputTatumsFile, lineterminator='\n') writer_MBTags_File = csv.writer(outputMBTagsFile, lineterminator='\n') for dirname, dirs, files in os.walk(files): for filename in files: filename_without_extension, extension = os.path.splitext(filename) if extension == '.h5': hdf = hdf5_getters.open_h5_file_read(dirname + '/' + filename) artist_id = hdf5_getters.get_artist_id(hdf) artists_mb_id = hdf5_getters.get_artist_mbid(hdf) artist_playmeid = hdf5_getters.get_artist_playmeid(hdf) artist_7digitalid = hdf5_getters.get_artist_7digitalid(hdf) artist_name = hdf5_getters.get_artist_name(hdf) artist_familarity = hdf5_getters.get_artist_familiarity(hdf) artist_hotttnesss = hdf5_getters.get_artist_hotttnesss(hdf) artist_location = hdf5_getters.get_artist_location(hdf) release = hdf5_getters.get_release(hdf) release_7digitalid = hdf5_getters.get_release_7digitalid(hdf) song_id = hdf5_getters.get_song_id(hdf) title = hdf5_getters.get_title(hdf) song_hotttnesss = hdf5_getters.get_song_hotttnesss(hdf) track_7digitalid = hdf5_getters.get_track_7digitalid(hdf) analysis_sample_rate = hdf5_getters.get_analysis_sample_rate( hdf) audio_md5 = hdf5_getters.get_audio_md5(hdf)
def data_to_flat_file(basedir, ext='.h5'): """This function extract the information from the tables and creates the flat file.""" count = 0 #song counter list_to_write = [] row_to_write = "" writer = csv.writer(open("metadata_wholeA.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title = title.replace('"', '') comma = title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album = album.replace('"', '') #eliminating commas in the album comma = album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma = artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name = artist_name.replace('"', '') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam = -1 artist_hotness = hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness = -1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat = -1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,") if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon = -1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability = -1 end_fade_in = hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in = -1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy = -1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c = -1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness = -1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf = -1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot = -1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo = -1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c = -1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg = get_avg(bars_c) bars_c_max = get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev = get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max = get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev = get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg = get_avg(beats_c) beats_c_max = get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev = get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max = get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev = get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg = get_avg(sec_c) sec_c_max = get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev = get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max = get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev = get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg = get_avg(seg_c) seg_c_max = get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev = get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg = get_avg(seg_loud_max) seg_loud_max_max = get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev = get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg = get_avg(seg_loud_max_time) seg_loud_max_time_max = get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev = get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg = get_avg(seg_loud_start) seg_loud_start_max = get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev = get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg = get_avg(seg_start) seg_start_max = get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev = get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg = get_avg(tatms_c) tatms_c_max = get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev = get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg = get_avg(tatms_start) tatms_start_max = get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev = get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes = get_genre_indexes( trm_freq) #index of the highest freq final_genre = [] genres_so_far = [] for i in range(len(genre_indexes)): genre_tmp = get_genre( art_trm, genre_indexes[i] ) #genre that corresponds to the highest freq genres_so_far = genre_dict.get_genre_in_dict( genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set = 1 #genre was found in dictionary if genre_set == 1: col_num = [] for genre in final_genre: column = int( genre) #getting the column number of the genre col_num.append(column) genre_array = genre_columns(col_num) #genre array else: genre_array = genre_columns( -1) #the genre was not found in the dictionary transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg = [] seg_pitch_max = [] seg_pitch_min = [] seg_pitch_stddev = [] seg_pitch_count = [] seg_pitch_sum = [] i = 0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i = i + 1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg = [] seg_timbre_max = [] seg_timbre_min = [] seg_timbre_stddev = [] seg_timbre_count = [] seg_timbre_sum = [] i = 0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i = i + 1 #Writing to the flat file writer.writerow([ title, album, artist_name, year, duration, seg_start_count, tempo ]) h5.close() count = count + 1 print count
def getInfo(files): data = [] build_str = '' with open(sys.argv[1], 'r') as f: contents = f.read() c = contents.split() f.close() print("creating csv with following fields:" + contents) for i in c: build_str = build_str + i + ',' build_str = build_str[:-1] build_str = build_str + '\n' for fil in files: curFile = getters.open_h5_file_read(fil) d2 = {} get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)} tid = fil.split('/')[-1].split('.')[0] # print(c) for i in c: if i in get_table: d2[i] = get_table[i] d2[i] = str(d2[i]).replace('\n','') build_str = build_str + d2[i] + ',' else: print('error: unspecified field') exit(0) build_str = build_str[:-1] # print(build_str[:-1]) build_str = build_str + '\n' curFile.close() build_str = build_str.replace('b','').replace("'",'').replace('"','') return (build_str)