def get_attribute(files): array = [] count = 0 for f in files: temp = [] count += 1 print(f) h5 = hdf5_getters.open_h5_file_read(f) temp.append(hdf5_getters.get_num_songs(h5)) temp.append(hdf5_getters.get_artist_familiarity(h5)) temp.append(hdf5_getters.get_artist_hotttnesss(h5)) temp.append(hdf5_getters.get_danceability(h5)) temp.append(hdf5_getters.get_energy(h5)) temp.append(hdf5_getters.get_key(h5)) temp.append(hdf5_getters.get_key_confidence(h5)) temp.append(hdf5_getters.get_loudness(h5)) temp.append(hdf5_getters.get_mode(h5)) temp.append(hdf5_getters.get_mode_confidence(h5)) temp.append(hdf5_getters.get_tempo(h5)) temp.append(hdf5_getters.get_time_signature(h5)) temp.append(hdf5_getters.get_time_signature_confidence(h5)) temp.append(hdf5_getters.get_title(h5)) temp.append(hdf5_getters.get_artist_name(h5)) temp = np.nan_to_num(temp) array.append(temp) # if count%100 ==0: # print(array[count-100:count-1]) # kmean.fit(array[count-100:count-1]) h5.close() return array
def get_all_examples(basedir, genre_dict, ext='.h5'): """ From a base directory, goes through all subdirectories, and grabs all songs and their features and puts them into a pandas dataframe INPUT basedir - base directory of the dataset genre_dict - a dictionary mapping track id to genre based tagraum dataset ext - extension, .h5 by default RETURN dataframe containing all song examples """ features_vs_genre = pd.DataFrame() # iterate over all files in all subdirectories for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) # # count files # count += len(files) # apply function to all files for f in files: h5 = GETTERS.open_h5_file_read(f) song_id = GETTERS.get_track_id(h5).decode('utf-8') if (song_id in genre_dict): genre = genre_dict[song_id] year = GETTERS.get_year(h5) duration = GETTERS.get_duration(h5) end_of_fade_in = GETTERS.get_end_of_fade_in(h5) loudness = GETTERS.get_loudness(h5) song_hotttnesss = GETTERS.get_song_hotttnesss(h5) tempo = GETTERS.get_tempo(h5) key = GETTERS.get_key(h5) key_confidence = GETTERS.get_key_confidence(h5) mode = GETTERS.get_mode(h5) mode_confidence = GETTERS.get_mode_confidence(h5) time_signature = GETTERS.get_time_signature(h5) time_signature_confidence = GETTERS.get_time_signature_confidence( h5) artist_name = GETTERS.get_artist_name(h5) title = GETTERS.get_title(h5) # length of sections_start array gives us number of start num_sections = len(GETTERS.get_sections_start(h5)) num_segments = len(GETTERS.get_segments_confidence(h5)) example = pd.DataFrame( data=[ (artist_name, title, song_id, genre, year, key, key_confidence, mode, mode_confidence, time_signature, time_signature_confidence, duration, end_of_fade_in, loudness, song_hotttnesss, tempo, num_sections) ], columns=[ 'artist_name', 'title', 'song_id', 'genre', 'year', 'key', 'key_confidence', 'mode', 'mode_confidence', 'time_signature', 'time_signature_confidence', 'duration', 'end_of_fade_in', 'loudness', 'song_hotttnesss', 'tempo', 'num_segments' ]) features_vs_genre = features_vs_genre.append(example) h5.close() return features_vs_genre
def get_key_feature(track, h5=None): #return #0: get key of the track #1:get mode (minor = 0, major = 1close = (h5== None) close = (h5 == None) if h5 == None: # build path path = "../../msd_dense_subset/dense/"+track[2]+"/"+track[3]+"/"+track[4]+"/"+track+".h5" h5 = GETTERS.open_h5_file_read(path) mode = GETTERS.get_mode(h5) key = GETTERS.get_key(h5) confidence_mode = GETTERS.get_mode_confidence(h5) confidence_key = GETTERS.get_key_confidence(h5) if close: h5.close() return (key,mode)
def get_key_feature(track, h5=None): #return #0: get key of the track #1:get mode (minor = 0, major = 1close = (h5== None) close = (h5 == None) if h5 == None: # build path path = "../../msd_dense_subset/dense/" + track[2] + "/" + track[ 3] + "/" + track[4] + "/" + track + ".h5" h5 = GETTERS.open_h5_file_read(path) mode = GETTERS.get_mode(h5) key = GETTERS.get_key(h5) confidence_mode = GETTERS.get_mode_confidence(h5) confidence_key = GETTERS.get_key_confidence(h5) if close: h5.close() return (key, mode)
def validate_song(h5_file,song): '''Returns true/false if song is valid or not. This is essentially cleanup and only lets through songs which have 'good' data (have a non-negligible duration, and have segments being most important)''' try: assert gt.get_year(h5_file, song)!='0' assert gt.get_duration(h5_file, song)>60.0 assert gt.get_mode_confidence(h5_file, song)>0.2 assert gt.get_key_confidence(h5_file, song)>0.2 assert gt.get_time_signature_confidence(h5_file, song)>0.2 terms=np.array(gt.get_artist_terms(h5_file, song)) assert terms.size>0 segments=np.array(gt.get_segments_start) assert segments.size>0 sections=np.array(gt.get_sections_start) assert sections.size>0 except: return False return True
def get_attribute(f): temp = [] count += 1 print(f) h5 = hdf5_getters.open_h5_file_read(f) temp.append(hdf5_getters.get_num_songs(h5)) temp.append(hdf5_getters.get_artist_familiarity(h5)) temp.append(hdf5_getters.get_artist_hotttnesss(h5)) temp.append(hdf5_getters.get_danceability(h5)) temp.append(hdf5_getters.get_energy(h5)) temp.append(hdf5_getters.get_key(h5)) temp.append(hdf5_getters.get_key_confidence(h5)) temp.append(hdf5_getters.get_loudness(h5)) temp.append(hdf5_getters.get_mode(h5)) temp.append(hdf5_getters.get_mode_confidence(h5)) temp.append(hdf5_getters.get_tempo(h5)) temp.append(hdf5_getters.get_time_signature(h5)) temp.append(hdf5_getters.get_time_signature_confidence(h5)) temp = np.nan_to_num(temp) array.append(temp) h5.close()
def get_song_data(results): songs_data = [] for f in results: h5 = getter.open_h5_file_read(f) songs_data.append([ os.path.basename(f), getter.get_artist_name(h5), getter.get_title(h5), getter.get_time_signature(h5), getter.get_key(h5), getter.get_segments_loudness_max(h5), getter.get_mode(h5), getter.get_beats_confidence(h5), getter.get_duration(h5), getter.get_tempo(h5), getter.get_loudness(h5), getter.get_segments_timbre(h5), getter.get_segments_pitches(h5), getter.get_key_confidence(h5), ]) h5.close() return songs_data
def main(): dataset_dir = sys.argv[1] global feat Create_BoW(dataset_dir) Size_BoW = Index_BoW(Bag_Words) count = Frequency(Size_BoW, dataset_dir) Size_BoW = Prune(count) Lablify() print "Forming Dataset..." listing1 = os.listdir(dataset_dir) for a in listing1: listing2 = os.listdir(dataset_dir+a+'/') for b in listing2: listing3 = os.listdir(dataset_dir+a+'/'+b+'/') for c in listing3: listing4 = os.listdir(dataset_dir+a+'/'+b+'/'+c+'/') for d in listing4: h5 = hdf5_getters.open_h5_file_read(dataset_dir+a+'/'+b+'/'+c+'/'+d) feat = [] temp = hdf5_getters.get_artist_hotttnesss(h5) if (math.isnan(temp) or temp==0.0): h5.close() continue feat.append(temp) temp = hdf5_getters.get_artist_familiarity(h5) if (math.isnan(temp) or temp==0.0): h5.close() continue feat.append(temp) temp = hdf5_getters.get_bars_confidence(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_beats_confidence(h5) if temp.size == 0: h5.close() continue mm = np.mean(temp) vv = np.var(temp) if mm==0.0 and vv==0.0: h5.close() continue feat.append(mm) feat.append(vv) feat.append(hdf5_getters.get_duration(h5)) temp = hdf5_getters.get_end_of_fade_in(h5) if (math.isnan(temp)): h5.close() continue feat.append(temp) feat.append(hdf5_getters.get_key(h5)) temp = hdf5_getters.get_key_confidence(h5) if (math.isnan(temp)): h5.close() continue feat.append(temp) temp = hdf5_getters.get_loudness(h5) if (math.isnan(temp)): h5.close() continue feat.append(temp) feat.append(hdf5_getters.get_mode(h5)) temp = hdf5_getters.get_mode_confidence(h5) if (math.isnan(temp)): h5.close() continue feat.append(temp) temp = hdf5_getters.get_sections_confidence(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_segments_confidence(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_segments_loudness_max(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_segments_loudness_max_time(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_segments_pitches(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_segments_timbre(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_start_of_fade_out(h5) if (math.isnan(temp)): h5.close() continue feat.append(temp) temp = hdf5_getters.get_tatums_confidence(h5) if temp.size == 0: h5.close() continue MeanVar(temp) temp = hdf5_getters.get_tempo(h5) if (math.isnan(temp)): h5.close() continue feat.append(temp) feat.append(hdf5_getters.get_time_signature(h5)) temp = hdf5_getters.get_time_signature_confidence(h5) if (math.isnan(temp)): h5.close() continue feat.append(temp) temp = hdf5_getters.get_year(h5) if temp == 0: h5.close() continue feat.append(temp) temp = hdf5_getters.get_artist_terms(h5) if temp.size == 0: h5.close() continue temp_ = hdf5_getters.get_artist_terms_weight(h5) if temp_.size == 0: continue for j in Final_BoW: if j in temp: x = np.where(temp==j) x = x[0][0] feat.append(temp_[x]) else: x = 0.0 feat.append(x) temp = hdf5_getters.get_song_hotttnesss(h5) if (math.isnan(temp) or temp==0.0): h5.close() continue hott = 0 if temp >=0.75: hott = 1 elif temp >=0.40 and temp <0.75: hott = 2 else: hott = 3 feat.append(hott) h5.close() count = 1 f=open('MSD_DATASET.txt', 'a') outstring='' cnt = 0 feat_size = len(feat) for i in feat: cnt+=1 outstring+=str(i) if (cnt!=feat_size): outstring+=',' outstring+='\n' f.write(outstring) f.close()
def convert_to_csv(): i = 0 data = [] target = [] count = 0 with open('data_timbre.csv', 'w+') as f: with open('target_timbre.csv', 'w+') as f2: writer = csv.writer(f) target_writer = csv.writer(f2) for root, dirs, files in os.walk(msd_subset_data_path): files = glob.glob(os.path.join(root,'*.h5')) for f in sorted(files): try: # Opening is very prone to causing exceptions, we'll just skip file if exception is thrown h5 = getter.open_h5_file_read(f) year = getter.get_year(h5) if year: count +=1 analysis_file = open('current_analysis_status.txt','a') update = "Currently at file name: " + str(f) + " and at number " + str(count) + "\n" analysis_file.write(update) print update analysis_file.close() target.append([year]) row = [] timbre = getter.get_segments_timbre(h5) segstarts = getter.get_segments_start(h5) btstarts = getter.get_beats_start(h5) duration = getter.get_duration(h5) end_of_fade_in = getter.get_end_of_fade_in(h5) key = getter.get_key(h5) key_confidence = getter.get_key_confidence(h5) loudness = getter.get_loudness(h5) start_of_fade_out = getter.get_start_of_fade_out(h5) tempo = getter.get_tempo(h5) time_signature = getter.get_time_signature(h5) time_signature_confidence = getter.get_time_signature_confidence(h5) h5.close() # VERY IMPORTANT segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() bttimbre = align_feats(timbre.T, segstarts, btstarts, duration, end_of_fade_in, key, key_confidence, loudness, start_of_fade_out, tempo, time_signature, time_signature_confidence) if bttimbre is None: continue # Skip this track, some features broken npicks, winsize, finaldim = 12, 12, 144 # Calculated by 12 * 12. 12 is fixed as number of dimensions. processed_feats = extract_and_compress(bttimbre, npicks, winsize, finaldim) n_p_feats = processed_feats.shape[0] if processed_feats is None: continue # Skip this track, some features broken row = processed_feats.flatten() if len(row) != 12*144: # 12 dimensions * 144 features per dimension continue # Not enough features year_row = np.array([year]) if row.any() and year_row.any(): writer.writerow(row) target_writer.writerow(year_row) i+=1 else: h5.close() except Exception: pass print 'Finished!' analysis_file = open('current_analysis_status.txt','a') analysis_file.write('Done!') analysis_file.close() return
def data_to_flat_file(basedir,ext='.h5') : """This function extract the information from the tables and creates the flat file.""" count = 0; #song counter list_to_write= [] row_to_write = "" writer = csv.writer(open("metadata_wholeA.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') comma=title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') #eliminating commas in the album comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,"); if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg= get_avg(bars_c) bars_c_max= get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev= get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max= get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev= get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg= get_avg(beats_c) beats_c_max= get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev= get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max= get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev= get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg= get_avg(sec_c) sec_c_max= get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev= get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max= get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev= get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg= get_avg(seg_c) seg_c_max= get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev= get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg= get_avg(seg_loud_max) seg_loud_max_max= get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev= get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg= get_avg(seg_loud_max_time) seg_loud_max_time_max= get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg= get_avg(seg_loud_start) seg_loud_start_max= get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev= get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg= get_avg(seg_start) seg_start_max= get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev= get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg= get_avg(tatms_c) tatms_c_max= get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev= get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg= get_avg(tatms_start) tatms_start_max= get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev= get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 #genre was found in dictionary if genre_set == 1: col_num=[] for genre in final_genre: column=int(genre) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array else: genre_array=genre_columns(-1) #the genre was not found in the dictionary transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 #Writing to the flat file writer.writerow([title,album,artist_name,year,duration,seg_start_count, tempo]) h5.close() count=count+1; print count;
def main(argv): if len(argv) != 1: print "Specify data directory" return basedir = argv[0] outputFile1 = open('SongCSV.csv', 'w') outputFile2 = open('TagsCSV.csv', 'w') csvRowString = "" csvLabelString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+ " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString); csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) #csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+ # "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+ # "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+ # "Title,Year") csvRowString = ("ArtistFamiliarity,ArtistHotttnesss,"+ "BarsConfidence,BarsStart,BeatsConfidence,BeatsStart,Duration,"+ "EndOfFadeIn,Key,KeyConfidence,Loudness,Mode,ModeConfidence,"+ "SectionsConfidence,SectionsStart,SegmentsConfidence,SegmentsLoudnessMax,"+ "SegmentsLoudnessMaxTime,SegmentsLoudnessStart,SegmentsStart,"+ "SongHotttnesss,StartOfFadeOut,TatumsConfidence,TatumsStart,Tempo,TimeSignature,TimeSignatureConfidence,"+ "SegmentsPitches,SegmentsTimbre,Title,Year,Decade,ArtistMbtags") ################################################# header = str() csvAttributeList = re.split('\W+', csvRowString) arrayAttributes = ["BarsConfidence","BarsStart","BeatsConfidence","BeatsStart", "SectionsConfidence","SectionsStart","SegmentsConfidence","SegmentsLoudnessMax", "SegmentsLoudnessMaxTime","SegmentsLoudnessStart","SegmentsStart", "TatumsConfidence","TatumsStart"] for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() if(v=="SegmentsPitches"): for i in range(90): header = header + "SegmentsPitches" + str(i) + "," elif(v=="SegmentsTimbre"): for i in range(90): header = header + "SegmentsTimbre" + str(i) + "," elif(v in arrayAttributes): header = header + v + str(0) + "," header = header + v + str(1) + "," else: header = header + v + "," outputFile1.write("SongNumber,"); #outputFile1.write(csvRowString + "\n"); outputFile1.write(header + "\n"); csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate #basedir = "MillionSongSubset/data/A/A/" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP all = sorted(os.walk(basedir)) for root, dirs, files in all: files = sorted(glob.glob(os.path.join(root,'*'+ext))) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) #testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.analysisSampleRate = str(hdf5_getters.get_analysis_sample_rate(songH5File)) song.artistFamiliarity = str(hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistMbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.barsConfidence = np.array(hdf5_getters.get_bars_confidence(songH5File)) song.barsStart = np.array(hdf5_getters.get_bars_start(songH5File)) song.beatsConfidence = np.array(hdf5_getters.get_beats_confidence(songH5File)) song.beatsStart = np.array(hdf5_getters.get_beats_start(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.sectionsConfidence = np.array(hdf5_getters.get_sections_confidence(songH5File)) song.sectionsStart = np.array(hdf5_getters.get_sections_start(songH5File)) song.segmentsConfidence = np.array(hdf5_getters.get_segments_confidence(songH5File)) song.segmentsLoudnessMax = np.array(hdf5_getters.get_segments_loudness_max(songH5File)) song.segmentsLoudnessMaxTime = np.array(hdf5_getters.get_segments_loudness_max_time(songH5File)) song.segmentsLoudnessStart = np.array(hdf5_getters.get_segments_loudness_start(songH5File)) song.segmentsPitches = np.array(hdf5_getters.get_segments_pitches(songH5File)) song.segmentsStart = np.array(hdf5_getters.get_segments_start(songH5File)) song.segmentsTimbre = np.array(hdf5_getters.get_segments_timbre(songH5File)) song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File)) song.tatumsConfidence = np.array(hdf5_getters.get_tatums_confidence(songH5File)) song.tatumsStart = np.array(hdf5_getters.get_tatums_start(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File)) song.songid = str(hdf5_getters.get_song_id(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) song.artistMbtags = str(hdf5_getters.get_artist_mbtags(songH5File)) #print song count csvRowString += str(song.songCount) + "," csvLabelString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AnalysisSampleRate'.lower(): csvRowString += song.analysisSampleRate elif attribute == 'ArtistFamiliarity'.lower(): csvRowString += song.artistFamiliarity elif attribute == 'ArtistHotttnesss'.lower(): csvRowString += song.artistHotttnesss elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistMbid'.lower(): csvRowString += song.artistMbid elif attribute == 'BarsConfidence'.lower(): arr = song.barsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'BarsStart'.lower(): arr = song.barsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'BeatsConfidence'.lower(): arr = song.beatsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'BeatsStart'.lower(): arr = song.beatsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'EndOfFadeIn'.lower(): csvRowString += song.endOfFadeIn elif attribute == 'Energy'.lower(): csvRowString += song.energy elif attribute == 'Key'.lower(): csvRowString += song.key elif attribute == 'KeyConfidence'.lower(): csvRowString += song.keyConfidence elif attribute == 'Loudness'.lower(): csvRowString += song.loudness elif attribute == 'Mode'.lower(): csvRowString += song.mode elif attribute == 'ModeConfidence'.lower(): csvRowString += song.modeConfidence elif attribute == 'SectionsConfidence'.lower(): arr = song.sectionsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SectionsStart'.lower(): arr = song.sectionsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsConfidence'.lower(): arr = song.segmentsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsLoudnessMax'.lower(): arr = song.segmentsLoudnessMax if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsLoudnessMaxTime'.lower(): arr = song.segmentsLoudnessMaxTime if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsLoudnessStart'.lower(): arr = song.segmentsLoudnessStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SegmentsStart'.lower(): arr = song.segmentsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'SongHotttnesss'.lower(): hotttnesss = song.songHotttnesss if hotttnesss == 'nan': hotttnesss = 'NaN' csvRowString += hotttnesss elif attribute == 'StartOfFadeOut'.lower(): csvRowString += song.startOfFadeOut elif attribute == 'TatumsConfidence'.lower(): arr = song.tatumsConfidence if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'TatumsStart'.lower(): arr = song.tatumsStart if arr.shape[0] == 0: arrmean = '' arrnorm = '' else: arrmean = np.mean(arr) arrnorm = np.linalg.norm(arr) csvRowString += str(arrmean) + ',' + str(arrnorm) elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'SegmentsPitches'.lower(): colmean = np.mean(song.segmentsPitches,axis=0) for m in colmean: csvRowString += str(m) + "," cov = np.dot(song.segmentsPitches.T,song.segmentsPitches) utriind = np.triu_indices(cov.shape[0]) feats = cov[utriind] for feat in feats: csvRowString += str(feat) + "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] elif attribute == 'SegmentsTimbre'.lower(): colmean = np.mean(song.segmentsTimbre,axis=0) for m in colmean: csvRowString += str(m) + "," cov = np.dot(song.segmentsTimbre.T,song.segmentsTimbre) utriind = np.triu_indices(cov.shape[0]) feats = cov[utriind] for feat in feats: csvRowString += str(feat) + "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'Decade'.lower(): yr = song.year if yr > 0: decade = song.year[:-1] + '0' else: decade = '0' csvRowString += decade elif attribute == 'ArtistMbtags'.lower(): tags = song.artistMbtags[1:-1] tags = "\"" + tags + "\"" tags = tags.replace("\n",'') csvRowString += tags tagsarray = shlex.split(tags) for t in tagsarray: csvLabelString += t + "," else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," ''' if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',',"") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',','') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," ''' #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" lastIndex = len(csvLabelString) csvLabelString = csvLabelString[0:lastIndex-1] csvLabelString += "\n" outputFile2.write(csvLabelString) csvLabelString = "" songH5File.close() outputFile1.close() outputFile2.close()
def fill_attributes(song, songH5File): #----------------------------non array attributes------------------------------- song.analysisSampleRate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File)) song.artistFamiliarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File)) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File)) song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_segments_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_sections_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.release = str(hdf5_getters.get_release(songH5File)) song.releaseDigitalID = str( hdf5_getters.get_release_7digitalid(songH5File)) song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.trackID = str(hdf5_getters.get_track_id(songH5File)) song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #-------------------------------array attributes-------------------------------------- #array float song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar( hdf5_getters.get_beats_start(songH5File)) #array float song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_freq(songH5File)) #array float song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_weight(songH5File)) #array int song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar( hdf5_getters.get_artist_mbtags_count(songH5File)) #array float song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_bars_confidence(songH5File)) #array float song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar( hdf5_getters.get_bars_start(songH5File)) #array float song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_beats_confidence(songH5File)) #array float song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_sections_confidence(songH5File)) #array float song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar( hdf5_getters.get_sections_start(songH5File)) #array float song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_segments_confidence(songH5File)) #array float song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max(songH5File)) #array float song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max_time(songH5File)) #array float song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_start(songH5File)) #array float song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_start(songH5File)) #array float song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_tatums_confidence(songH5File)) #array float song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar( hdf5_getters.get_tatums_start(songH5File)) #array2d float song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_timbre(songH5File)) #array2d float song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_pitches(songH5File)) #------------------------array string attributes------------------------ song.similarArtists = convert_array_to_string( hdf5_getters.get_similar_artists(songH5File)) #array string song.artistTerms = convert_array_to_string( hdf5_getters.get_artist_terms(songH5File)) #array string song.artistmbTags = convert_array_to_string( hdf5_getters.get_artist_mbtags(songH5File)) #array string return song
def get_all_rows(basedir, ext='.h5'): rows = [] for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: # print(os.path.join(root, f)) h5 = hdf5_getters.open_h5_file_read(f) num_songs = hdf5_getters.get_num_songs(h5) # print(num_songs) for i in range(num_songs): print(i) obj = {} obj['artist_name'] = hdf5_getters.get_artist_name( h5, i).decode('UTF-8') obj['artist_familiarity'] = hdf5_getters.get_artist_familiarity( h5, i) obj['artist_hotness'] = hdf5_getters.get_artist_hotttnesss( h5, i) obj['artist_id'] = hdf5_getters.get_artist_id( h5, i).decode('UTF-8') # obj['artist_mbid']=hdf5_getters.get_artist_mbid(h5,i).decode('UTF-8') obj['artist_playmeid'] = hdf5_getters.get_artist_playmeid( h5, i) obj['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid( h5, i) # obj['artist_latitude']=hdf5_getters.get_artist_latitude(h5,i) # obj['artist_longitude']=hdf5_getters.get_artist_longitude(h5,i) # obj['artist_location']=hdf5_getters.get_artist_location(h5,i).decode('UTF-8') obj['artist_name'] = hdf5_getters.get_artist_name( h5, i).decode('UTF-8') obj['release'] = hdf5_getters.get_release(h5, i).decode('UTF-8') obj['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss( h5, i) obj['title'] = hdf5_getters.get_title(h5, i).decode('UTF-8') # obj['artist_terms']=hdf5_getters.get_artist_terms(h5) # obj['artist_terms_freq']=hdf5_getters.get_artist_terms_freq(h5) # obj['artist_terms_weight']=hdf5_getters.get_artist_terms_weight(h5) # obj['audio_md5']=hdf5_getters.get_audio_md5(h5).decode('UTF-8') obj['danceability'] = hdf5_getters.get_danceability(h5, i) obj['duration'] = hdf5_getters.get_duration(h5, i) obj['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5, i) obj['energy'] = hdf5_getters.get_energy(h5, i) obj['key'] = hdf5_getters.get_key(h5, i) obj['key_confidence'] = hdf5_getters.get_key_confidence(h5, i) obj['loudness'] = hdf5_getters.get_loudness(h5, i) obj['mode'] = hdf5_getters.get_mode(h5, i) # obj['start_of_fade_out']=hdf5_getters.get_start_of_fade_out(h5) obj['tempo'] = hdf5_getters.get_tempo(h5, i) obj['time_signature'] = hdf5_getters.get_time_signature(h5, i) # obj['time_signature_confidence']=hdf5_getters.get_time_signature_confidence(h5) obj['track_id'] = hdf5_getters.get_track_id(h5, i).decode('UTF-8') # obj['segments_start']=hdf5_getters.get_segments_start(h5) # obj['segments_confidence']=hdf5_getters.get_segments_confidence(h5) # obj['segments_pitches']=hdf5_getters.get_segments_pitches(h5) # obj['segments_timbre']=hdf5_getters.get_segments_timbre(h5) # obj['segments_loudness_max']=hdf5_getters.get_segments_loudness_max(h5) # obj['segments_loudness_max_time']=hdf5_getters.get_segments_loudness_max_time(h5) # obj['segments_confidence']=hdf5_getters.get_segments_confidence(h5) # obj['segments_loudness_start']=hdf5_getters.get_segments_loudness_start(h5) # obj['sections_start']=hdf5_getters.get_sections_start(h5) # obj['sections_confidence']=hdf5_getters.get_sections_confidence(h5) # obj['beats_start']=hdf5_getters.get_beats_start(h5) # obj['beats_confidence']=hdf5_getters.get_beats_confidence(h5) # obj['bars_start']=hdf5_getters.get_bars_start(h5) # obj['bars_confidence']=hdf5_getters.get_bars_confidence(h5) # obj['tatums_start']=hdf5_getters.get_tatums_start(h5) # obj['artist_mbtags']=hdf5_getters.get_artist_mbtags(h5) # obj['artist_mbtags_count']=hdf5_getters.get_artist_mbtags_count(h5) obj['year'] = hdf5_getters.get_year(h5, i) rows.append(obj) h5.close() return rows
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Familiarity'.lower(): ####Added by us! csvRowString += song.familiarity elif attribute == 'artist_mbid'.lower(): csvRowString += song.artist_mbid elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digid'.lower(): csvRowString += song.artist_7digid elif attribute == 'hottness'.lower(): csvRowString += song.hottness elif attribute == 'song_hottness'.lower(): csvRowString += song.song_hottness elif attribute == 'digitalid7'.lower(): csvRowString += song.digitalid7 elif attribute == 'similar_artists'.lower(): csvRowString += song.similar_artists elif attribute == 'artist_terms'.lower(): csvRowString += song.artist_terms elif attribute == 'art_terms_freq'.lower(): csvRowString += song.art_terms_freq elif attribute == 'art_terms_weight'.lower(): csvRowString += song.art_terms_weight elif attribute == 'a_sample_rate'.lower(): csvRowString += song.a_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += song.audio_md5 elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_conf'.lower(): csvRowString += song.mode_conf elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out elif attribute == 'trackid'.lower(): csvRowString += song.trackid elif attribute == 'segm_start'.lower(): csvRowString += song.segm_start elif attribute == 'segm_conf'.lower(): csvRowString += song.segm_conf elif attribute == 'segm_pitch'.lower(): csvRowString += song.segm_pitch elif attribute == 'segm_timbre'.lower(): csvRowString += song.segm_timbre elif attribute == 'segm_max_loud'.lower(): csvRowString += song.segm_max_loud elif attribute == 'segm_max_loud_time'.lower(): csvRowString += song.segm_max_loud_time elif attribute == 'segm_loud_start'.lower(): csvRowString += song.segm_loud_start elif attribute == 'sect_start'.lower(): csvRowString += song.sect_start elif attribute == 'sect_conf'.lower(): csvRowString += song.sect_conf elif attribute == 'beats_start'.lower(): csvRowString += song.beats_start elif attribute == 'beats_conf'.lower(): csvRowString += song.beats_conf elif attribute == 'bars_start'.lower(): csvRowString += song.bars_start elif attribute == 'bars_conf'.lower(): csvRowString += song.bars_conf elif attribute == 'tatums_start'.lower(): csvRowString += song.tatums_start elif attribute == 'tatums_conf'.lower(): csvRowString += song.tatums_conf elif attribute == 'artist_mbtags'.lower(): csvRowString += song.artist_mbtags elif attribute == 'artist_mbtags_count'.lower(): csvRowString += song.artist_mbtags_count elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print("==============") print("I believe there has been an error with the input.") print("==============") break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,Title,Year,Familiarity,Artist_Mbid,Artist_PlaymeId,Artist_7didId,Hottness,Song_Hottness,7digitalid,A_Sample_Rate,Audio_Md5,End_Of_Fade_In,Energy,Loudness,Mode,Mode_Conf,Start_Of_Fade_Out,TrackId" ################################################# csvAttributeList = re.split(',', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "/home/bigdata/smalltest/" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) # testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #########Added by us! song.familiarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.artist_playmeid = str( hdf5_getters.get_artist_playmeid(songH5File)) song.artist_7digid = str( hdf5_getters.get_artist_7digitalid(songH5File)) song.hottness = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.song_hottness = str( hdf5_getters.get_song_hotttnesss(songH5File)) song.digitalid7 = str( hdf5_getters.get_track_7digitalid(songH5File)) #song.similar_artists = str(hdf5_getters.get_similar_artists(songH5File)) #song.artist_terms = str(hdf5_getters.get_artist_terms(songH5File)) #song.art_terms_freq = str(hdf5_getters.get_artist_terms_freq(songH5File)) #song.art_terms_weight = str(hdf5_getters.get_artist_terms_weight(songH5File)) song.a_sample_rate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File)) song.end_of_fade_in = str( hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.mode_conf = str(hdf5_getters.get_mode_confidence(songH5File)) song.start_of_fade_out = str( hdf5_getters.get_start_of_fade_out(songH5File)) song.trackid = str(hdf5_getters.get_track_id(songH5File)) #song.segm_start = str(hdf5_getters.get_segments_start(songH5File)) #song.segm_conf = str(hdf5_getters.get_segments_confidence(songH5File)) #song.segm_pitch = str(hdf5_getters.get_segments_pitches(songH5File)) #song.segm_timbre = str(hdf5_getters.get_segments_timbre(songH5File)) #song.segm_max_loud = str(hdf5_getters.get_segments_loudness_max(songH5File)) #song.segm_max_loud_time = str(hdf5_getters.get_segments_loudness_max_time(songH5File)) #song.segm_loud_start = str(hdf5_getters.get_segments_loudness_start(songH5File)) #song.sect_start = str(hdf5_getters.get_sections_start(songH5File)) #song.sect_conf = str(hdf5_getters.get_sections_confidence(songH5File)) #song.beats_start = str(hdf5_getters.get_beats_start(songH5File)) #song.beats_conf = str(hdf5_getters.get_beats_confidence(songH5File)) #song.bars_start = str(hdf5_getters.get_bars_start(songH5File)) #song.bars_conf = str(hdf5_getters.get_bars_confidence(songH5File)) #song.tatums_start = str(hdf5_getters.get_tatums_start(songH5File)) #song.tatums_conf = str(hdf5_getters.get_tatums_confidence(songH5File)) #song.artist_mbtags = str(hdf5_getters.get_artist_mbtags(songH5File)) #song.artist_mbtags_count = str(hdf5_getters.get_artist_mbtags_count(songH5File)) #print song count #csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace("b\"", "") albumName = albumName.replace("\"", "") albumName = albumName.replace(',', "") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',', '') location = location.replace("b\"", "") location = location.replace("\"", "") csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): artistName = song.artistName artistName = artistName.replace("b\"", "") artistName = artistName.replace("\"", "") csvRowString += "\"" + artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): t = song.title t = t.replace("b\"", "") t = t.replace("\"", "") csvRowString += "\"" + t + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'Familiarity'.lower(): ####Added by us! csvRowString += song.familiarity elif attribute == 'artist_mbid'.lower(): csvRowString += "\"" + song.artist_mbid + "\"" elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digid'.lower(): csvRowString += song.artist_7digid elif attribute == 'hottness'.lower(): csvRowString += song.hottness elif attribute == 'song_hottness'.lower(): csvRowString += song.song_hottness elif attribute == 'digitalid7'.lower(): csvRowString += song.digitalid7 elif attribute == 'similar_artists'.lower(): csvRowString += song.similar_artists elif attribute == 'artist_terms'.lower(): csvRowString += song.artist_terms elif attribute == 'art_terms_freq'.lower(): csvRowString += song.art_terms_freq elif attribute == 'art_terms_weight'.lower(): csvRowString += song.art_terms_weight elif attribute == 'a_sample_rate'.lower(): csvRowString += song.a_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += "\"" + song.audio_md5 + "\"" elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_conf'.lower(): csvRowString += song.mode_conf elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out elif attribute == 'trackid'.lower(): csvRowString += "\"" + song.trackid + "\"" elif attribute == 'segm_start'.lower(): csvRowString += song.segm_start elif attribute == 'segm_conf'.lower(): csvRowString += song.segm_conf elif attribute == 'segm_pitch'.lower(): csvRowString += song.segm_pitch elif attribute == 'segm_timbre'.lower(): csvRowString += song.segm_timbre elif attribute == 'segm_max_loud'.lower(): csvRowString += song.segm_max_loud elif attribute == 'segm_max_loud_time'.lower(): csvRowString += song.segm_max_loud_time elif attribute == 'segm_loud_start'.lower(): csvRowString += song.segm_loud_start elif attribute == 'sect_start'.lower(): csvRowString += song.sect_start elif attribute == 'sect_conf'.lower(): csvRowString += song.sect_conf elif attribute == 'beats_start'.lower(): csvRowString += song.beats_start elif attribute == 'beats_conf'.lower(): csvRowString += song.beats_conf elif attribute == 'bars_start'.lower(): csvRowString += song.bars_start elif attribute == 'bars_conf'.lower(): csvRowString += song.bars_conf elif attribute == 'tatums_start'.lower(): csvRowString += song.tatums_start elif attribute == 'tatums_conf'.lower(): csvRowString += song.tatums_conf elif attribute == 'artist_mbtags'.lower(): csvRowString += song.artist_mbtags elif attribute == 'artist_mbtags_count'.lower(): csvRowString += song.artist_mbtags_count else: csvRowString += "\"ERR\"" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
artist_name = GETTERS.get_artist_name(h5, i) artist_playmeid = GETTERS.get_artist_playmeid(h5, i) artist_terms = ','.join(str(e) for e in GETTERS.get_artist_terms(h5, i)) # array #artist_terms_freq = ','.join(str(e) for e in GETTERS.get_artist_terms_freq(h5, i)) # array #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array #audio_md5 = GETTERS.get_audio_md5(h5, i) #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array #beats_start = ','.join(str(e) for e in GETTERS.get_beats_start(h5, i)) # array danceability = GETTERS.get_danceability(h5, i) duration = GETTERS.get_duration(h5, i) end_of_fade_in = GETTERS.get_end_of_fade_in(h5, i) energy = GETTERS.get_energy(h5, i) key = GETTERS.get_key(h5, i) key_confidence = GETTERS.get_key_confidence(h5, i) loudness = GETTERS.get_loudness(h5, i) mode = GETTERS.get_mode(h5, i) mode_confidence = GETTERS.get_mode_confidence(h5, i) release = GETTERS.get_release(h5, i) release_7digitalid = GETTERS.get_release_7digitalid(h5, i) #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array similar_artists = ','.join(str(e) for e in GETTERS.get_similar_artists(h5, i)) # array
def main(): basedir = "D:/Master K" ext = ".H5" # Set the extension here. H5 is the extension for HDF5 files. songs = [] for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) #songs = {} #keys = list() #values = list() for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) item = {"song_id": song.id.replace('b', '')} song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.artistID = song.artistID.replace('b', '', 1) item["song_artistID"] = song.artistID song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumID = song.albumID.replace('b', '', 1) item["song_albumID"] = song.albumID song.albumName = str(hdf5_getters.get_release(songH5File)) song.albumName = song.albumName.replace('b', '', 1) item["song_albumName"] = song.albumName song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLatitude = song.artistLatitude.replace('b', '', 1) item["song_artistLatitude"] = song.artistLatitude song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLocation = song.artistLocation.replace('b', '', 1) item["song_artistLocation"] = song.artistLocation song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistLongitude = song.artistLongitude.replace('b', '', 1) item["song_artistLongitude"] = song.artistLongitude song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.artistName = song.artistName.replace('b', '', 1) item["song_artistName"] = song.artistName song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.danceability = song.danceability.replace('b', '', 1) item["song_danceability"] = song.danceability song.duration = str(hdf5_getters.get_duration(songH5File)) song.duration = song.duration.replace('b', '', 1) item["song_duration"] = song.duration song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignature = song.keySignature.replace('b', '', 1) item["song_keySignature"] = song.keySignature song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) song.keySignatureConfidence = song.keySignatureConfidence.replace( 'b', '', 1) item["song_keySignatureConfidence"] = song.keySignatureConfidence song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.tempo = song.tempo.replace('b', '', 1) item["song_tempo"] = song.tempo song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignature = song.timeSignature.replace('b', '', 1) item["song_timeSignature"] = song.timeSignature song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.timeSignatureConfidence = song.timeSignatureConfidence.replace( 'b', '', 1) item["song_timeSignatureConfidence"] = song.timeSignatureConfidence song.title = str(hdf5_getters.get_title(songH5File)) song.title = song.title.replace('b', '', 1) item["song_title"] = song.title song.year = str(hdf5_getters.get_year(songH5File)) song.year = song.year.replace('b', '', 1) item["song_year"] = song.year #song.mfcc = str(hdf5_getters.get_segments_timbre(songH5File)) #item["song_mfcc"] = song.mfcc item["song_mfcc"] = list( hdf5_getters.get_segments_timbre(songH5File)) song.hotness = str(hdf5_getters.get_artist_hotttnesss(songH5File)) item["song_hotness"] = song.hotness songs.append(item) songH5File.close() #song_dict= dict(zip(keys, values)) with open("D:\data_file_k.json", "w") as write_file: json.dump(songs, write_file, cls=NumpyEncoder)
def main(): basedir = "./../songMetaInfo.txt" ext = ".h5" if len(sys.argv) > 1: basedir = sys.argv[1] outputfile = 'SongFileMetaData.csv' if len(sys.argv) > 2: outputfile = sys.argv[2] csvWriter = open(outputfile, 'w') csvWriter.write( "title,songId,artistId,artistfamilarity,artistHotness,songHotness," + "songEnfOfFadeIn,startFadeout,energy,loudness,albumID,albumName,artistName,danceability,duration,keySignatureConfidence,tempo,timeSignature,timeSignatureConfidence,year\n" ) with open(basedir) as file: for line in file.readlines(): f = line.strip() #newf = f + "text" print f #print f try: songH5File = hdf5_getters.open_h5_file_read(f) csvStr = "" #0 title = str(hdf5_getters.get_title(songH5File)) csvStr += title + "," #1 songId = str(hdf5_getters.get_song_id(songH5File)) csvStr += songId + "," #2 artistId = str(hdf5_getters.get_artist_id(songH5File)) csvStr += artistId + "," #3 artistfamilarity = str( hdf5_getters.get_artist_familiarity(songH5File)) csvStr += artistfamilarity + "," #4 artistHotness = str( hdf5_getters.get_artist_hotttnesss(songH5File)) csvStr += artistHotness + "," #5 songHotness = str(hdf5_getters.get_song_hotttnesss(songH5File)) csvStr += songHotness + "," #6 songEnfOfFadeIn = str( hdf5_getters.get_end_of_fade_in(songH5File)) csvStr += songEnfOfFadeIn + "," #7 startFadeOut = str( hdf5_getters.get_start_of_fade_out(songH5File)) csvStr += startFadeOut + "," #8 energy = str(hdf5_getters.get_energy(songH5File)) csvStr += energy + "," #9 loudness = str(hdf5_getters.get_loudness(songH5File)) csvStr += loudness + "," #10 albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) csvStr += albumID + "," #11 albumName = str(hdf5_getters.get_release(songH5File)) csvStr += albumName + "," #12 artistName = str(hdf5_getters.get_artist_name(songH5File)) csvStr += artistName + "," #13 danceability = str(hdf5_getters.get_danceability(songH5File)) csvStr += danceability + "," #14 duration = str(hdf5_getters.get_duration(songH5File)) csvStr += duration + "," #15 keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) csvStr += keySignatureConfidence + "," #16 tempo = str(hdf5_getters.get_tempo(songH5File)) csvStr += tempo + "," ## 17 timeSignature = str( hdf5_getters.get_time_signature(songH5File)) csvStr += timeSignature + "," #18 timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) csvStr += timeSignatureConfidence + "," #19 year = str(hdf5_getters.get_year(songH5File)) csvStr += year + "," #print song count csvStr += "\n" csvWriter.write(csvStr) #print csvStr songH5File.close() except: print "Error in processing file" csvWriter.close()
def getInfo(files): data = [] build_str = '' with open(sys.argv[1], 'r') as f: contents = f.read() c = contents.split() f.close() print("creating csv with following fields:" + contents) for i in c: build_str = build_str + i + ',' build_str = build_str[:-1] build_str = build_str + '\n' for fil in files: curFile = getters.open_h5_file_read(fil) d2 = {} get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)} tid = fil.split('/')[-1].split('.')[0] # print(c) for i in c: if i in get_table: d2[i] = get_table[i] d2[i] = str(d2[i]).replace('\n','') build_str = build_str + d2[i] + ',' else: print('error: unspecified field') exit(0) build_str = build_str[:-1] # print(build_str[:-1]) build_str = build_str + '\n' curFile.close() build_str = build_str.replace('b','').replace("'",'').replace('"','') return (build_str)
# Check existence of the HDF5 file if not os.path.isfile(hdf5path): continue #print('ERROR: file',hdf5path,'does not exist.') #sys.exit(0) h5 = hdf5_getters.open_h5_file_read(hdf5path) # Retrieve features from HDF5 danceability = hdf5_getters.get_danceability(h5) duration = hdf5_getters.get_duration(h5) time_of_fade_in = hdf5_getters.get_end_of_fade_in(h5) energy = hdf5_getters.get_energy(h5) key = hdf5_getters.get_key(h5) key_confidence = hdf5_getters.get_key_confidence(h5) loudness = hdf5_getters.get_loudness(h5) mode = hdf5_getters.get_mode(h5) mode_confidence = hdf5_getters.get_mode_confidence(h5) sections_start = hdf5_getters.get_sections_start(h5) num_sections = len(sections_start) if num_sections == 0: h5.close() continue segments_loudness_max = hdf5_getters.get_segments_loudness_max(h5) segments_loudness_start = hdf5_getters.get_segments_loudness_start(h5) num_segments = len(hdf5_getters.get_segments_start(h5)) num_tatums = len(hdf5_getters.get_tatums_start(h5)) time_of_fade_out = duration - hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) time_signature = hdf5_getters.get_time_signature(h5)
def data_to_flat_file(basedir, ext='.h5'): """ This function extracts the information from the tables and creates the flat file. """ count = 0 #song counter list_to_write = [] group_index = 0 row_to_write = "" writer = csv.writer(open("complete.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: row = [] print f h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title = title.replace('"', '') row.append(title) comma = title.find(',') if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album = album.replace('"', '') row.append(album) comma = album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma = artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name = artist_name.replace('"', '') row.append(artist_name) duration = hdf5_getters.get_duration(h5) row.append(duration) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) row.append(samp_rt) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) row.append(artist_7digitalid) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam = -1 row.append(artist_fam) artist_hotness = hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness = -1 row.append(artist_hotness) artist_id = hdf5_getters.get_artist_id(h5) row.append(artist_id) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat = -1 row.append(artist_lat) artist_loc = hdf5_getters.get_artist_location(h5) row.append(artist_loc) artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon = -1 row.append(artist_lon) artist_mbid = hdf5_getters.get_artist_mbid(h5) row.append(artist_mbid) #Getting the genre art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes = get_genre_indexes( trm_freq) #index of the highest freq genre_set = 0 #flag to see if the genre has been set or not final_genre = [] genres_so_far = [] for i in range(len(genre_indexes)): genre_tmp = get_genre( art_trm, genre_indexes[i] ) #genre that corresponds to the highest freq genres_so_far = genre_dict.get_genre_in_dict( genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set = 1 if genre_set == 1: col_num = [] for i in final_genre: column = int(i) #getting the column number of the genre col_num.append(column) genre_array = genre_columns(col_num) #genre array for i in range(len( genre_array)): #appending the genre_array to the row row.append(genre_array[i]) else: genre_array = genre_columns( -1 ) #when there is no genre matched, return an array of [0...0] for i in range(len( genre_array)): #appending the genre_array to the row row.append(genre_array[i]) artist_pmid = hdf5_getters.get_artist_playmeid(h5) row.append(artist_pmid) audio_md5 = hdf5_getters.get_audio_md5(h5) row.append(audio_md5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability = -1 row.append(danceability) end_fade_in = hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in = -1 row.append(end_fade_in) energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy = -1 row.append(energy) song_key = hdf5_getters.get_key(h5) row.append(song_key) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c = -1 row.append(key_c) loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness = -1 row.append(loudness) mode = hdf5_getters.get_mode(h5) row.append(mode) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf = -1 row.append(mode_conf) release_7digitalid = hdf5_getters.get_release_7digitalid(h5) row.append(release_7digitalid) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot = -1 row.append(song_hot) song_id = hdf5_getters.get_song_id(h5) row.append(song_id) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) row.append(start_fade_out) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo = -1 row.append(tempo) time_sig = hdf5_getters.get_time_signature(h5) row.append(time_sig) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c = -1 row.append(time_sig_c) track_id = hdf5_getters.get_track_id(h5) row.append(track_id) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) row.append(track_7digitalid) year = hdf5_getters.get_year(h5) row.append(year) bars_c = hdf5_getters.get_bars_confidence(h5) bars_start = hdf5_getters.get_bars_start(h5) row_bars_padding = padding( 245 ) #this is the array that will be attached at the end of th row #--------------bars---------------" gral_info = [] gral_info = row[:] empty = [] for i, item in enumerate(bars_c): row.append(group_index) row.append(i) row.append(bars_c[i]) bars_c_avg = get_avg(bars_c) row.append(bars_c_avg) bars_c_max = get_max(bars_c) row.append(bars_c_max) bars_c_min = get_min(bars_c) row.append(bars_c_min) bars_c_stddev = get_stddev(bars_c) row.append(bars_c_stddev) bars_c_count = get_count(bars_c) row.append(bars_c_count) bars_c_sum = get_sum(bars_c) row.append(bars_c_sum) row.append(bars_start[i]) bars_start_avg = get_avg(bars_start) row.append(bars_start_avg) bars_start_max = get_max(bars_start) row.append(bars_start_max) bars_start_min = get_min(bars_start) row.append(bars_start_min) bars_start_stddev = get_stddev(bars_start) row.append(bars_start_stddev) bars_start_count = get_count(bars_start) row.append(bars_start_count) bars_start_sum = get_sum(bars_start) row.append(bars_start_sum) for i in row_bars_padding: row.append(i) writer.writerow(row) row = [] row = gral_info[:] #--------beats---------------" beats_c = hdf5_getters.get_beats_confidence(h5) group_index = 1 row = [] row = gral_info[:] row_front = padding( 14) #blanks left in front of the row(empty spaces for bars) row_beats_padding = padding(231) for i, item in enumerate(beats_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the beats row.append(index) row.append(beats_c[i]) beats_c_avg = get_avg(beats_c) row.append(beats_c_avg) beats_c_max = get_max(beats_c) row.append(beats_c_max) beats_c_min = get_min(beats_c) row.append(beats_c_min) beats_c_stddev = get_stddev(beats_c) row.append(beats_c_stddev) beats_c_count = get_count(beats_c) row.append(beats_c_count) beats_c_sum = get_sum(beats_c) row.append(beats_c_sum) beats_start = hdf5_getters.get_beats_start(h5) row.append(beats_start[i]) beats_start_avg = get_avg(beats_start) row.append(beats_start_avg) beats_start_max = get_max(beats_start) row.append(beats_start_max) beats_start_min = get_min(beats_start) row.append(beats_start_min) beats_start_stddev = get_stddev(beats_start) row.append(beats_start_stddev) beats_start_count = get_count(beats_start) row.append(beats_start_count) beats_start_sum = get_sum(beats_start) row.append(beats_start_sum) for i in row_beats_padding: row.append(i) writer.writerow(row) row = [] row = gral_info[:] # "--------sections---------------" row_sec_padding = padding( 217) #blank spaces left at the end of the row sec_c = hdf5_getters.get_sections_confidence(h5) group_index = 2 row = [] row = gral_info[:] row_front = padding( 28) #blank spaces left in front(empty spaces for bars,beats) for i, item in enumerate(sec_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the sections row.append(index) row.append(sec_c[i]) sec_c_avg = get_avg(sec_c) row.append(sec_c_avg) sec_c_max = get_max(sec_c) row.append(sec_c_max) sec_c_min = get_min(sec_c) row.append(sec_c_min) sec_c_stddev = get_stddev(sec_c) row.append(sec_c_stddev) sec_c_count = get_count(sec_c) row.append(sec_c_count) sec_c_sum = get_sum(sec_c) row.append(sec_c_sum) sec_start = hdf5_getters.get_sections_start(h5) row.append(sec_start[i]) sec_start_avg = get_avg(sec_start) row.append(sec_start_avg) sec_start_max = get_max(sec_start) row.append(sec_start_max) sec_start_min = get_min(sec_start) row.append(sec_start_min) sec_start_stddev = get_stddev(sec_start) row.append(sec_start_stddev) sec_start_count = get_count(sec_start) row.append(sec_start_count) sec_start_sum = get_sum(sec_start) row.append(sec_start_sum) for i in row_sec_padding: #appending the blank spaces at the end of the row row.append(i) writer.writerow(row) row = [] row = gral_info[:] #--------segments---------------" row_seg_padding = padding(182) #blank spaces at the end of the row row_front = padding(42) #blank spaces left in front of segments seg_c = hdf5_getters.get_segments_confidence(h5) group_index = 3 row = [] row = gral_info[:] for i, item in enumerate(seg_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the segments row.append(index) row.append(seg_c[i]) seg_c_avg = get_avg(seg_c) row.append(seg_c_avg) seg_c_max = get_max(seg_c) row.append(seg_c_max) seg_c_min = get_min(seg_c) row.append(seg_c_min) seg_c_stddev = get_stddev(seg_c) row.append(seg_c_stddev) seg_c_count = get_count(seg_c) row.append(seg_c_count) seg_c_sum = get_sum(seg_c) row.append(seg_c_sum) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) row.append(seg_loud_max[i]) seg_loud_max_avg = get_avg(seg_loud_max) row.append(seg_loud_max_avg) seg_loud_max_max = get_max(seg_loud_max) row.append(seg_loud_max_max) seg_loud_max_min = get_min(seg_loud_max) row.append(seg_loud_max_min) seg_loud_max_stddev = get_stddev(seg_loud_max) row.append(seg_loud_max_stddev) seg_loud_max_count = get_count(seg_loud_max) row.append(seg_loud_max_count) seg_loud_max_sum = get_sum(seg_loud_max) row.append(seg_loud_max_sum) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time( h5) row.append(seg_loud_max_time[i]) seg_loud_max_time_avg = get_avg(seg_loud_max_time) row.append(seg_loud_max_time_avg) seg_loud_max_time_max = get_max(seg_loud_max_time) row.append(seg_loud_max_time_max) seg_loud_max_time_min = get_min(seg_loud_max_time) row.append(seg_loud_max_time_min) seg_loud_max_time_stddev = get_stddev(seg_loud_max_time) row.append(seg_loud_max_time_stddev) seg_loud_max_time_count = get_count(seg_loud_max_time) row.append(seg_loud_max_time_count) seg_loud_max_time_sum = get_sum(seg_loud_max_time) row.append(seg_loud_max_time_sum) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) row.append(seg_loud_start[i]) seg_loud_start_avg = get_avg(seg_loud_start) row.append(seg_loud_start_avg) seg_loud_start_max = get_max(seg_loud_start) row.append(seg_loud_start_max) seg_loud_start_min = get_min(seg_loud_start) row.append(seg_loud_start_min) seg_loud_start_stddev = get_stddev(seg_loud_start) row.append(seg_loud_start_stddev) seg_loud_start_count = get_count(seg_loud_start) row.append(seg_loud_start_count) seg_loud_start_sum = get_sum(seg_loud_start) row.append(seg_loud_start_sum) seg_start = hdf5_getters.get_segments_start(h5) row.append(seg_start[i]) seg_start_avg = get_avg(seg_start) row.append(seg_start_avg) seg_start_max = get_max(seg_start) row.append(seg_start_max) seg_start_min = get_min(seg_start) row.append(seg_start_min) seg_start_stddev = get_stddev(seg_start) row.append(seg_start_stddev) seg_start_count = get_count(seg_start) row.append(seg_start_count) seg_start_sum = get_sum(seg_start) row.append(seg_start_sum) for i in row_seg_padding: #appending blank spaces at the end of the row row.append(i) writer.writerow(row) row = [] row = gral_info[:] #----------segments pitch and timbre---------------" row_seg2_padding = padding( 14) #blank spaces left at the end of the row row_front = padding( 77) #blank spaces left at the front of the segments and timbre seg_pitch = hdf5_getters.get_segments_pitches(h5) transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows group_index = 4 row = [] row = gral_info[:] for i, item in enumerate(transpose_pitch[0]): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of segments and timbre row.append(index) row.append(transpose_pitch[0][i]) seg_pitch_avg = get_avg(transpose_pitch[0]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[0]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[0]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[0]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[0]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[0]) row.append(seg_pitch_sum) row.append(transpose_pitch[1][i]) seg_pitch_avg = get_avg(transpose_pitch[1]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[1]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[1]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[1]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[1]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[1]) row.append(seg_pitch_sum) row.append(transpose_pitch[2][i]) seg_pitch_avg = get_avg(transpose_pitch[2]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[2]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[2]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[2]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[2]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[2]) row.append(seg_pitch_sum) row.append(transpose_pitch[3][i]) seg_pitch_avg = get_avg(transpose_pitch[3]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[3]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[3]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[3]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[3]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[3]) row.append(seg_pitch_sum) row.append(transpose_pitch[4][i]) seg_pitch_avg = get_avg(transpose_pitch[4]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[4]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[4]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[4]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[4]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[4]) row.append(seg_pitch_sum) row.append(transpose_pitch[5][i]) seg_pitch_avg = get_avg(transpose_pitch[5]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[5]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[5]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[5]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[5]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[5]) row.append(seg_pitch_sum) row.append(transpose_pitch[6][i]) seg_pitch_avg = get_avg(transpose_pitch[6]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[6]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[6]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[6]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[6]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[6]) row.append(seg_pitch_sum) row.append(transpose_pitch[7][i]) seg_pitch_avg = get_avg(transpose_pitch[7]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[7]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[7]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[7]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[7]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[7]) row.append(seg_pitch_sum) row.append(transpose_pitch[8][i]) seg_pitch_avg = get_avg(transpose_pitch[8]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[8]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[8]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[8]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[8]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[8]) row.append(seg_pitch_sum) row.append(transpose_pitch[9][i]) seg_pitch_avg = get_avg(transpose_pitch[9]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[9]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[9]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[9]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[9]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[9]) row.append(seg_pitch_sum) row.append(transpose_pitch[10][i]) seg_pitch_avg = get_avg(transpose_pitch[10]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[10]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[10]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[10]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[10]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[10]) row.append(seg_pitch_sum) row.append(transpose_pitch[11][i]) seg_pitch_avg = get_avg(transpose_pitch[11]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[11]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[11]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[11]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[11]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[11]) row.append(seg_pitch_sum) #timbre arrays seg_timbre = hdf5_getters.get_segments_timbre(h5) transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows row.append(transpose_timbre[0][i]) seg_timbre_avg = get_avg(transpose_timbre[0]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[0]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[0]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[0]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[0]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[0]) row.append(seg_timbre_sum) row.append(transpose_timbre[1][i]) seg_timbre_avg = get_avg(transpose_timbre[1]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[1]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[1]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[1]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[1]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[1]) row.append(seg_timbre_sum) row.append(transpose_timbre[2][i]) seg_timbre_avg = get_avg(transpose_timbre[2]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[2]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[2]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[2]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[2]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[2]) row.append(seg_timbre_sum) row.append(transpose_timbre[3][i]) seg_timbre_avg = get_avg(transpose_timbre[3]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[3]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[3]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[3]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[3]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[3]) row.append(seg_timbre_sum) row.append(transpose_timbre[4][i]) seg_timbre_avg = get_avg(transpose_timbre[4]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[4]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[4]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[4]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[4]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[4]) row.append(seg_timbre_sum) row.append(transpose_timbre[5][i]) seg_timbre_avg = get_avg(transpose_timbre[5]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[5]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[5]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[5]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[5]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[5]) row.append(seg_timbre_sum) row.append(transpose_timbre[6][i]) seg_timbre_avg = get_avg(transpose_timbre[6]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[6]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[6]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[6]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[6]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[6]) row.append(seg_timbre_sum) row.append(transpose_timbre[7][i]) seg_timbre_avg = get_avg(transpose_timbre[7]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[7]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[7]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[7]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[7]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[7]) row.append(seg_timbre_sum) row.append(transpose_timbre[8][i]) seg_timbre_avg = get_avg(transpose_timbre[8]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[8]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[8]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[8]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[8]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[8]) row.append(seg_timbre_sum) row.append(transpose_timbre[9][i]) seg_timbre_avg = get_avg(transpose_timbre[9]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[9]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[9]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[9]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[9]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[9]) row.append(seg_timbre_sum) row.append(transpose_timbre[10][i]) seg_timbre_avg = get_avg(transpose_timbre[10]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[10]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[10]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[10]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[10]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[10]) row.append(seg_timbre_sum) row.append(transpose_timbre[11][i]) seg_timbre_avg = get_avg(transpose_timbre[11]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[11]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[11]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[11]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[11]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[11]) row.append(seg_timbre_sum) for item in row_seg2_padding: row.append(item) writer.writerow(row) row = [] row = gral_info[:] # "--------tatums---------------" tatms_c = hdf5_getters.get_tatums_confidence(h5) group_index = 5 row_front = padding(245) #blank spaces left in front of tatums row = [] row = gral_info[:] for i, item in enumerate(tatms_c): row.append(group_index) row.append(i) for item in row_front: #appending blank spaces at the front of the row row.append(item) row.append(tatms_c[i]) tatms_c_avg = get_avg(tatms_c) row.append(tatms_c_avg) tatms_c_max = get_max(tatms_c) row.append(tatms_c_max) tatms_c_min = get_min(tatms_c) row.append(tatms_c_min) tatms_c_stddev = get_stddev(tatms_c) row.append(tatms_c_stddev) tatms_c_count = get_count(tatms_c) row.append(tatms_c_count) tatms_c_sum = get_sum(tatms_c) row.append(tatms_c_sum) tatms_start = hdf5_getters.get_tatums_start(h5) row.append(tatms_start[i]) tatms_start_avg = get_avg(tatms_start) row.append(tatms_start_avg) tatms_start_max = get_max(tatms_start) row.append(tatms_start_max) tatms_start_min = get_min(tatms_start) row.append(tatms_start_min) tatms_start_stddev = get_stddev(tatms_start) row.append(tatms_start_stddev) tatms_start_count = get_count(tatms_start) row.append(tatms_start_count) tatms_start_sum = get_sum(tatms_start) row.append(tatms_start_sum) writer.writerow(row) row = [] row = gral_info[:] transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg = [] seg_pitch_max = [] seg_pitch_min = [] seg_pitch_stddev = [] seg_pitch_count = [] seg_pitch_sum = [] i = 0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i = i + 1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg = [] seg_timbre_max = [] seg_timbre_min = [] seg_timbre_stddev = [] seg_timbre_count = [] seg_timbre_sum = [] i = 0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i = i + 1 h5.close() count = count + 1 print count
artist_playmeid = GETTERS.get_artist_playmeid(h5, i) artist_terms = ','.join( str(e) for e in GETTERS.get_artist_terms(h5, i)) # array #artist_terms_freq = ','.join(str(e) for e in GETTERS.get_artist_terms_freq(h5, i)) # array #artist_terms_weight = ','.join(str(e) for e in GETTERS.get_artist_terms_weight(h5, i)) # array #audio_md5 = GETTERS.get_audio_md5(h5, i) #bars_confidence = ','.join(str(e) for e in GETTERS.get_bars_confidence(h5, i)) # array #bars_start = ','.join(str(e) for e in GETTERS.get_bars_start(h5, i)) # array #beats_confidence = ','.join(str(e) for e in GETTERS.get_beats_confidence(h5, i)) # array #beats_start = ','.join(str(e) for e in GETTERS.get_beats_start(h5, i)) # array danceability = GETTERS.get_danceability(h5, i) duration = GETTERS.get_duration(h5, i) end_of_fade_in = GETTERS.get_end_of_fade_in(h5, i) energy = GETTERS.get_energy(h5, i) key = GETTERS.get_key(h5, i) key_confidence = GETTERS.get_key_confidence(h5, i) loudness = GETTERS.get_loudness(h5, i) mode = GETTERS.get_mode(h5, i) mode_confidence = GETTERS.get_mode_confidence(h5, i) release = GETTERS.get_release(h5, i) release_7digitalid = GETTERS.get_release_7digitalid(h5, i) #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array similar_artists = ','.join(
def main(): outputFileName = sys.argv[2] outputFile1 = open(outputFileName, 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" # outputFile1.write(csvRowString); csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ( "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation," + "ArtistLongitude,ArtistFamiliarity,ArtistHotttnesss,ArtistName," + "ArtistMBTags,ArtistTerms," + "Danceability,Energy,Duration,KeySignature," + "KeySignatureConfidence,Loudness,Mode,Hotttnesss,Tempo,TimeSignature,TimeSignatureConfidence," + "Title,Year") ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() # outputFile1.write("SongNumber,"); # outputFile1.write(csvRowString + "\n"); csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = sys.argv[1] # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistFamiliarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotttnesss = str( hdf5_getters.get_artist_hotttnesss(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.artistMBTags = ','.join( hdf5_getters.get_artist_mbtags(songH5File)) # song.artistMBTagsCount = ','.join(hdf5_getters.get_artist_mbtags_count(songH5File)) song.artistTerms = ','.join( hdf5_getters.get_artist_terms(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) # song.lyrics = None # song.popularity = None song.hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #print song count # csvRowString += str(song.songCount) + "," rowString = json.dumps({ 'AlbumID': song.albumID, 'AlbumName': song.albumName, 'ArtistID': song.artistID, 'ArtistLatitude': song.artistLatitude, 'ArtistLocation': song.artistLocation, 'ArtistLongitude': song.artistLongitude, 'ArtistFamiliarity': song.artistFamiliarity, 'ArtistHotttnesss': song.artistHotttnesss, 'ArtistName': song.artistName, 'ArtistMBTags': song.artistMBTags, 'ArtistTerms': song.artistTerms, 'Danceability': song.danceability, 'Energy': song.energy, 'Duration': song.duration, 'KeySignature': song.keySignature, 'KeySignatureConfidence': song.keySignatureConfidence, 'Loudness': song.loudness, 'Mode': song.mode, 'Hotttnesss': song.hotttnesss, 'Tempo': song.tempo, 'SongID': song.id, 'TimeSignature': song.timeSignature, 'TimeSignatureConfidence': song.timeSignatureConfidence, 'Title': song.title, 'Year': song.year, }) #Remove the final comma from each row in the csv rowString += "\n" outputFile1.write(rowString) songH5File.close() outputFile1.close()
def data_to_flat_file(basedir,ext='.h5') : """ This function extracts the information from the tables and creates the flat file. """ count = 0; #song counter list_to_write= [] group_index=0 row_to_write = "" writer = csv.writer(open("complete.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: row=[] print f h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') row.append(title) comma=title.find(',') if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') row.append(album) comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') row.append(artist_name) duration = hdf5_getters.get_duration(h5) row.append(duration) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) row.append(samp_rt) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) row.append(artist_7digitalid) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 row.append(artist_fam) artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 row.append(artist_hotness) artist_id = hdf5_getters.get_artist_id(h5) row.append(artist_id) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 row.append(artist_lat) artist_loc = hdf5_getters.get_artist_location(h5) row.append(artist_loc) artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 row.append(artist_lon) artist_mbid = hdf5_getters.get_artist_mbid(h5) row.append(artist_mbid) #Getting the genre art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq genre_set=0 #flag to see if the genre has been set or not final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 if genre_set == 1: col_num=[] for i in final_genre: column=int(i) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array for i in range(len(genre_array)): #appending the genre_array to the row row.append(genre_array[i]) else: genre_array=genre_columns(-1) #when there is no genre matched, return an array of [0...0] for i in range(len(genre_array)): #appending the genre_array to the row row.append(genre_array[i]) artist_pmid = hdf5_getters.get_artist_playmeid(h5) row.append(artist_pmid) audio_md5 = hdf5_getters.get_audio_md5(h5) row.append(audio_md5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 row.append(danceability) end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 row.append(end_fade_in) energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 row.append(energy) song_key = hdf5_getters.get_key(h5) row.append(song_key) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 row.append(key_c) loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 row.append(loudness) mode = hdf5_getters.get_mode(h5) row.append(mode) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 row.append(mode_conf) release_7digitalid = hdf5_getters.get_release_7digitalid(h5) row.append(release_7digitalid) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 row.append(song_hot) song_id = hdf5_getters.get_song_id(h5) row.append(song_id) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) row.append(start_fade_out) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 row.append(tempo) time_sig = hdf5_getters.get_time_signature(h5) row.append(time_sig) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 row.append(time_sig_c) track_id = hdf5_getters.get_track_id(h5) row.append(track_id) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) row.append(track_7digitalid) year = hdf5_getters.get_year(h5) row.append(year) bars_c = hdf5_getters.get_bars_confidence(h5) bars_start = hdf5_getters.get_bars_start(h5) row_bars_padding=padding(245) #this is the array that will be attached at the end of th row #--------------bars---------------" gral_info=[] gral_info=row[:] empty=[] for i,item in enumerate(bars_c): row.append(group_index) row.append(i) row.append(bars_c[i]) bars_c_avg= get_avg(bars_c) row.append(bars_c_avg) bars_c_max= get_max(bars_c) row.append(bars_c_max) bars_c_min = get_min(bars_c) row.append(bars_c_min) bars_c_stddev= get_stddev(bars_c) row.append(bars_c_stddev) bars_c_count = get_count(bars_c) row.append(bars_c_count) bars_c_sum = get_sum(bars_c) row.append(bars_c_sum) row.append(bars_start[i]) bars_start_avg = get_avg(bars_start) row.append(bars_start_avg) bars_start_max= get_max(bars_start) row.append(bars_start_max) bars_start_min = get_min(bars_start) row.append(bars_start_min) bars_start_stddev= get_stddev(bars_start) row.append(bars_start_stddev) bars_start_count = get_count(bars_start) row.append(bars_start_count) bars_start_sum = get_sum(bars_start) row.append(bars_start_sum) for i in row_bars_padding: row.append(i) writer.writerow(row) row=[] row=gral_info[:] #--------beats---------------" beats_c = hdf5_getters.get_beats_confidence(h5) group_index=1 row=[] row=gral_info[:] row_front=padding(14) #blanks left in front of the row(empty spaces for bars) row_beats_padding=padding(231) for i,item in enumerate(beats_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the beats row.append(index) row.append(beats_c[i]) beats_c_avg= get_avg(beats_c) row.append(beats_c_avg) beats_c_max= get_max(beats_c) row.append(beats_c_max) beats_c_min = get_min(beats_c) row.append(beats_c_min) beats_c_stddev= get_stddev(beats_c) row.append(beats_c_stddev) beats_c_count = get_count(beats_c) row.append(beats_c_count) beats_c_sum = get_sum(beats_c) row.append(beats_c_sum) beats_start = hdf5_getters.get_beats_start(h5) row.append(beats_start[i]) beats_start_avg = get_avg(beats_start) row.append(beats_start_avg) beats_start_max= get_max(beats_start) row.append(beats_start_max) beats_start_min = get_min(beats_start) row.append(beats_start_min) beats_start_stddev= get_stddev(beats_start) row.append(beats_start_stddev) beats_start_count = get_count(beats_start) row.append(beats_start_count) beats_start_sum = get_sum(beats_start) row.append(beats_start_sum) for i in row_beats_padding: row.append(i) writer.writerow(row) row=[] row=gral_info[:] # "--------sections---------------" row_sec_padding=padding(217) #blank spaces left at the end of the row sec_c = hdf5_getters.get_sections_confidence(h5) group_index=2 row=[] row=gral_info[:] row_front=padding(28) #blank spaces left in front(empty spaces for bars,beats) for i,item in enumerate(sec_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the sections row.append(index) row.append(sec_c[i]) sec_c_avg= get_avg(sec_c) row.append(sec_c_avg) sec_c_max= get_max(sec_c) row.append(sec_c_max) sec_c_min = get_min(sec_c) row.append(sec_c_min) sec_c_stddev= get_stddev(sec_c) row.append(sec_c_stddev) sec_c_count = get_count(sec_c) row.append(sec_c_count) sec_c_sum = get_sum(sec_c) row.append(sec_c_sum) sec_start = hdf5_getters.get_sections_start(h5) row.append(sec_start[i]) sec_start_avg = get_avg(sec_start) row.append(sec_start_avg) sec_start_max= get_max(sec_start) row.append(sec_start_max) sec_start_min = get_min(sec_start) row.append(sec_start_min) sec_start_stddev= get_stddev(sec_start) row.append(sec_start_stddev) sec_start_count = get_count(sec_start) row.append(sec_start_count) sec_start_sum = get_sum(sec_start) row.append(sec_start_sum) for i in row_sec_padding: #appending the blank spaces at the end of the row row.append(i) writer.writerow(row) row=[] row=gral_info[:] #--------segments---------------" row_seg_padding=padding(182) #blank spaces at the end of the row row_front=padding(42) #blank spaces left in front of segments seg_c = hdf5_getters.get_segments_confidence(h5) group_index=3 row=[] row=gral_info[:] for i,item in enumerate(seg_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the segments row.append(index) row.append(seg_c[i]) seg_c_avg= get_avg(seg_c) row.append(seg_c_avg) seg_c_max= get_max(seg_c) row.append(seg_c_max) seg_c_min = get_min(seg_c) row.append(seg_c_min) seg_c_stddev= get_stddev(seg_c) row.append(seg_c_stddev) seg_c_count = get_count(seg_c) row.append(seg_c_count) seg_c_sum = get_sum(seg_c) row.append(seg_c_sum) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) row.append(seg_loud_max[i]) seg_loud_max_avg= get_avg(seg_loud_max) row.append(seg_loud_max_avg) seg_loud_max_max= get_max(seg_loud_max) row.append(seg_loud_max_max) seg_loud_max_min = get_min(seg_loud_max) row.append(seg_loud_max_min) seg_loud_max_stddev= get_stddev(seg_loud_max) row.append(seg_loud_max_stddev) seg_loud_max_count = get_count(seg_loud_max) row.append(seg_loud_max_count) seg_loud_max_sum = get_sum(seg_loud_max) row.append(seg_loud_max_sum) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) row.append(seg_loud_max_time[i]) seg_loud_max_time_avg= get_avg(seg_loud_max_time) row.append(seg_loud_max_time_avg) seg_loud_max_time_max= get_max(seg_loud_max_time) row.append(seg_loud_max_time_max) seg_loud_max_time_min = get_min(seg_loud_max_time) row.append(seg_loud_max_time_min) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) row.append(seg_loud_max_time_stddev) seg_loud_max_time_count = get_count(seg_loud_max_time) row.append(seg_loud_max_time_count) seg_loud_max_time_sum = get_sum(seg_loud_max_time) row.append(seg_loud_max_time_sum) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) row.append(seg_loud_start[i]) seg_loud_start_avg= get_avg(seg_loud_start) row.append(seg_loud_start_avg) seg_loud_start_max= get_max(seg_loud_start) row.append(seg_loud_start_max) seg_loud_start_min = get_min(seg_loud_start) row.append(seg_loud_start_min) seg_loud_start_stddev= get_stddev(seg_loud_start) row.append(seg_loud_start_stddev) seg_loud_start_count = get_count(seg_loud_start) row.append(seg_loud_start_count) seg_loud_start_sum = get_sum(seg_loud_start) row.append(seg_loud_start_sum) seg_start = hdf5_getters.get_segments_start(h5) row.append(seg_start[i]) seg_start_avg= get_avg(seg_start) row.append(seg_start_avg) seg_start_max= get_max(seg_start) row.append(seg_start_max) seg_start_min = get_min(seg_start) row.append(seg_start_min) seg_start_stddev= get_stddev(seg_start) row.append(seg_start_stddev) seg_start_count = get_count(seg_start) row.append(seg_start_count) seg_start_sum = get_sum(seg_start) row.append(seg_start_sum) for i in row_seg_padding: #appending blank spaces at the end of the row row.append(i) writer.writerow(row) row=[] row=gral_info[:] #----------segments pitch and timbre---------------" row_seg2_padding=padding(14) #blank spaces left at the end of the row row_front=padding(77) #blank spaces left at the front of the segments and timbre seg_pitch = hdf5_getters.get_segments_pitches(h5) transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows group_index=4 row=[] row=gral_info[:] for i,item in enumerate(transpose_pitch[0]): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of segments and timbre row.append(index) row.append(transpose_pitch[0][i]) seg_pitch_avg= get_avg(transpose_pitch[0]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[0]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[0]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[0]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[0]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[0]) row.append(seg_pitch_sum) row.append(transpose_pitch[1][i]) seg_pitch_avg= get_avg(transpose_pitch[1]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[1]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[1]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[1]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[1]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[1]) row.append(seg_pitch_sum) row.append(transpose_pitch[2][i]) seg_pitch_avg= get_avg(transpose_pitch[2]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[2]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[2]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[2]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[2]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[2]) row.append(seg_pitch_sum) row.append(transpose_pitch[3][i]) seg_pitch_avg= get_avg(transpose_pitch[3]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[3]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[3]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[3]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[3]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[3]) row.append(seg_pitch_sum) row.append(transpose_pitch[4][i]) seg_pitch_avg= get_avg(transpose_pitch[4]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[4]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[4]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[4]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[4]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[4]) row.append(seg_pitch_sum) row.append(transpose_pitch[5][i]) seg_pitch_avg= get_avg(transpose_pitch[5]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[5]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[5]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[5]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[5]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[5]) row.append(seg_pitch_sum) row.append(transpose_pitch[6][i]) seg_pitch_avg= get_avg(transpose_pitch[6]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[6]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[6]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[6]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[6]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[6]) row.append(seg_pitch_sum) row.append(transpose_pitch[7][i]) seg_pitch_avg= get_avg(transpose_pitch[7]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[7]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[7]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[7]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[7]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[7]) row.append(seg_pitch_sum) row.append(transpose_pitch[8][i]) seg_pitch_avg= get_avg(transpose_pitch[8]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[8]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[8]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[8]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[8]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[8]) row.append(seg_pitch_sum) row.append(transpose_pitch[9][i]) seg_pitch_avg= get_avg(transpose_pitch[9]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[9]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[9]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[9]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[9]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[9]) row.append(seg_pitch_sum) row.append(transpose_pitch[10][i]) seg_pitch_avg= get_avg(transpose_pitch[10]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[10]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[10]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[10]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[10]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[10]) row.append(seg_pitch_sum) row.append(transpose_pitch[11][i]) seg_pitch_avg= get_avg(transpose_pitch[11]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[11]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[11]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[11]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[11]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[11]) row.append(seg_pitch_sum) #timbre arrays seg_timbre = hdf5_getters.get_segments_timbre(h5) transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows row.append(transpose_timbre[0][i]) seg_timbre_avg= get_avg(transpose_timbre[0]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[0]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[0]) row.append(seg_timbre_min) seg_timbre_stddev=get_stddev(transpose_timbre[0]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[0]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[0]) row.append(seg_timbre_sum) row.append(transpose_timbre[1][i]) seg_timbre_avg= get_avg(transpose_timbre[1]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[1]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[1]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[1]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[1]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[1]) row.append(seg_timbre_sum) row.append(transpose_timbre[2][i]) seg_timbre_avg= get_avg(transpose_timbre[2]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[2]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[2]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[2]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[2]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[2]) row.append(seg_timbre_sum) row.append(transpose_timbre[3][i]) seg_timbre_avg= get_avg(transpose_timbre[3]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[3]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[3]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[3]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[3]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[3]) row.append(seg_timbre_sum) row.append(transpose_timbre[4][i]) seg_timbre_avg= get_avg(transpose_timbre[4]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[4]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[4]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[4]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[4]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[4]) row.append(seg_timbre_sum) row.append(transpose_timbre[5][i]) seg_timbre_avg= get_avg(transpose_timbre[5]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[5]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[5]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[5]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[5]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[5]) row.append(seg_timbre_sum) row.append(transpose_timbre[6][i]) seg_timbre_avg= get_avg(transpose_timbre[6]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[6]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[6]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[6]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[6]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[6]) row.append(seg_timbre_sum) row.append(transpose_timbre[7][i]) seg_timbre_avg= get_avg(transpose_timbre[7]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[7]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[7]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[7]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[7]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[7]) row.append(seg_timbre_sum) row.append(transpose_timbre[8][i]) seg_timbre_avg= get_avg(transpose_timbre[8]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[8]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[8]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[8]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[8]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[8]) row.append(seg_timbre_sum) row.append(transpose_timbre[9][i]) seg_timbre_avg= get_avg(transpose_timbre[9]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[9]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[9]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[9]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[9]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[9]) row.append(seg_timbre_sum) row.append(transpose_timbre[10][i]) seg_timbre_avg= get_avg(transpose_timbre[10]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[10]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[10]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[10]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[10]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[10]) row.append(seg_timbre_sum) row.append(transpose_timbre[11][i]) seg_timbre_avg= get_avg(transpose_timbre[11]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[11]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[11]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[11]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[11]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[11]) row.append(seg_timbre_sum) for item in row_seg2_padding: row.append(item) writer.writerow(row) row=[] row=gral_info[:] # "--------tatums---------------" tatms_c = hdf5_getters.get_tatums_confidence(h5) group_index=5 row_front=padding(245) #blank spaces left in front of tatums row=[] row=gral_info[:] for i,item in enumerate(tatms_c): row.append(group_index) row.append(i) for item in row_front: #appending blank spaces at the front of the row row.append(item) row.append(tatms_c[i]) tatms_c_avg= get_avg(tatms_c) row.append(tatms_c_avg) tatms_c_max= get_max(tatms_c) row.append(tatms_c_max) tatms_c_min = get_min(tatms_c) row.append(tatms_c_min) tatms_c_stddev= get_stddev(tatms_c) row.append(tatms_c_stddev) tatms_c_count = get_count(tatms_c) row.append(tatms_c_count) tatms_c_sum = get_sum(tatms_c) row.append(tatms_c_sum) tatms_start = hdf5_getters.get_tatums_start(h5) row.append(tatms_start[i]) tatms_start_avg= get_avg(tatms_start) row.append(tatms_start_avg) tatms_start_max= get_max(tatms_start) row.append(tatms_start_max) tatms_start_min = get_min(tatms_start) row.append(tatms_start_min) tatms_start_stddev= get_stddev(tatms_start) row.append(tatms_start_stddev) tatms_start_count = get_count(tatms_start) row.append(tatms_start_count) tatms_start_sum = get_sum(tatms_start) row.append(tatms_start_sum) writer.writerow(row) row=[] row=gral_info[:] transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 h5.close() count=count+1; print count;
song.loudness = remove_trap_characters( str(hdf5_getters.get_loudness(songH5File))) song.mode = remove_trap_characters( str(hdf5_getters.get_mode(songH5File))) song.modeConfidence = remove_trap_characters( str(hdf5_getters.get_mode_confidence(songH5File))) song.artistName = remove_trap_characters( str(hdf5_getters.get_artist_name(songH5File))) song.danceability = remove_trap_characters( str(hdf5_getters.get_danceability(songH5File))) song.duration = remove_trap_characters( str(hdf5_getters.get_duration(songH5File))) song.keySignature = remove_trap_characters( str(hdf5_getters.get_key(songH5File))) song.keySignatureConfidence = remove_trap_characters( str(hdf5_getters.get_key_confidence(songH5File))) song.tempo = remove_trap_characters( str(hdf5_getters.get_tempo(songH5File))) song.timeSignature = remove_trap_characters( str(hdf5_getters.get_time_signature(songH5File))) song.timeSignatureConfidence = remove_trap_characters( str(hdf5_getters.get_time_signature_confidence(songH5File))) song.title = remove_trap_characters( str(hdf5_getters.get_title(songH5File))) song.year = remove_trap_characters( str(hdf5_getters.get_year(songH5File))) song.trackID = remove_trap_characters( str(hdf5_getters.get_track_id(songH5File))) temp = hdf5_getters.get_segments_start(songH5File) song.segmentsStart = remove_trap_characters(str(list(temp)))
def main(): outputFile = open('songs.csv', 'w') writer = csv.writer(outputFile) csvRowString = "song_number,artist_familiarity,artist_hotttnesss,artist_id,artist_mbid,artist_playmeid,artist_7digitalid,artist_latitude,artist_longitude,artist_location,artist_name,release,release_7digitalid,song_id,song_hotttnesss,title,track_7digitalid,analysis_sample_rate,audio_md5,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out,tempo,time_signature,time_signature_confidence,track_id,year" outputFile.write(csvRowString + "\n") csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "." # "." As the default means the current directory ext = ".H5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP songCount = 0 for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) values = [ songCount, hdf5_getters.get_artist_familiarity(songH5File), hdf5_getters.get_artist_hotttnesss(songH5File), hdf5_getters.get_artist_id(songH5File), hdf5_getters.get_artist_mbid(songH5File), hdf5_getters.get_artist_playmeid(songH5File), hdf5_getters.get_artist_7digitalid(songH5File), hdf5_getters.get_artist_latitude(songH5File), hdf5_getters.get_artist_longitude(songH5File), hdf5_getters.get_artist_location(songH5File), hdf5_getters.get_artist_name(songH5File), hdf5_getters.get_release(songH5File), hdf5_getters.get_release_7digitalid(songH5File), hdf5_getters.get_song_id(songH5File), hdf5_getters.get_song_hotttnesss(songH5File), hdf5_getters.get_title(songH5File), hdf5_getters.get_track_7digitalid(songH5File), hdf5_getters.get_analysis_sample_rate(songH5File), hdf5_getters.get_audio_md5(songH5File), hdf5_getters.get_danceability(songH5File), hdf5_getters.get_duration(songH5File), hdf5_getters.get_end_of_fade_in(songH5File), hdf5_getters.get_energy(songH5File), hdf5_getters.get_key(songH5File), hdf5_getters.get_key_confidence(songH5File), hdf5_getters.get_loudness(songH5File), hdf5_getters.get_mode(songH5File), hdf5_getters.get_mode_confidence(songH5File), hdf5_getters.get_start_of_fade_out(songH5File), hdf5_getters.get_tempo(songH5File), hdf5_getters.get_time_signature(songH5File), hdf5_getters.get_time_signature_confidence(songH5File), hdf5_getters.get_track_id(songH5File), hdf5_getters.get_year(songH5File) ] songH5File.close() songCount = songCount + 1 writer.writerow(values) outputFile.close()
def main(): dataset_dir = sys.argv[1] feat =[] feat1=[] feat2=[] feat3=[] feat4=[] print "Forming Dataset..." listing1 = os.listdir(dataset_dir) for a in listing1: listing2 = os.listdir(dataset_dir+a+'/') for b in listing2: listing3 = os.listdir(dataset_dir+a+'/'+b+'/') for c in listing3: listing4 = os.listdir(dataset_dir+a+'/'+b+'/'+c+'/') for d in listing4: h5 = hdf5_getters.open_h5_file_read(dataset_dir+a+'/'+b+'/'+c+'/'+d) feat =[] feat1=[] feat2=[] feat3=[] feat4=[] temp = hdf5_getters.get_artist_hotttnesss(h5) if (math.isnan(temp) or temp==0.0): h5.close() continue temp = hdf5_getters.get_artist_familiarity(h5) if (math.isnan(temp) or temp==0.0): h5.close() continue temp = hdf5_getters.get_end_of_fade_in(h5) if (math.isnan(temp)): h5.close() continue temp = hdf5_getters.get_key_confidence(h5) if (math.isnan(temp)): h5.close() continue temp = hdf5_getters.get_loudness(h5) if (math.isnan(temp)): h5.close() continue temp = hdf5_getters.get_mode_confidence(h5) if (math.isnan(temp)): h5.close() continue temp = hdf5_getters.get_sections_confidence(h5) if temp.size == 0: h5.close() continue temp = hdf5_getters.get_segments_confidence(h5) if temp.size == 0: h5.close() continue temp = hdf5_getters.get_segments_loudness_max(h5) if temp.size == 0: h5.close() continue temp = hdf5_getters.get_segments_loudness_max_time(h5) if temp.size == 0: h5.close() continue temp = hdf5_getters.get_segments_pitches(h5) if temp.size == 0: h5.close() continue temp = hdf5_getters.get_segments_timbre(h5) if temp.size == 0: h5.close() continue temp = hdf5_getters.get_start_of_fade_out(h5) if (math.isnan(temp)): h5.close() continue temp = hdf5_getters.get_tempo(h5) if (math.isnan(temp)): h5.close() continue temp = hdf5_getters.get_time_signature_confidence(h5) if (math.isnan(temp)): h5.close() continue temp = hdf5_getters.get_year(h5) if temp == 0: h5.close() continue temp = hdf5_getters.get_artist_terms(h5) if temp.size == 0: h5.close() continue temp_ = hdf5_getters.get_artist_terms_weight(h5) if temp_.size == 0: continue temp = hdf5_getters.get_bars_confidence(h5) sz = temp.size if sz<50: h5.close() continue temp = hdf5_getters.get_beats_confidence(h5) sz = temp.size if sz <50: h5.close() continue mm = np.mean(temp) vv = np.var(temp) if mm==0.0 and vv==0.0: h5.close() continue temp = hdf5_getters.get_segments_confidence(h5) sz = temp.size if sz <50: h5.close() continue temp = hdf5_getters.get_tatums_confidence(h5) sz = temp.size if sz <50: h5.close() continue temp = hdf5_getters.get_song_hotttnesss(h5) if (math.isnan(temp) or temp==0.0): h5.close() continue temp = hdf5_getters.get_bars_confidence(h5) sz = temp.size sz1 = sz/50 i=1 j=0 while i<=50: if i == 50: sz2 = sz else: sz2 = i*sz1 num=0.0 acc = 0 while j<sz2: acc += temp[j] j+=1 num+=1.0 mm = acc/num feat1.append(mm) i+=1 temp = hdf5_getters.get_beats_confidence(h5) sz = temp.size sz1 = sz/50 i=1 j=0 while i<=50: if i == 50: sz2 = sz else: sz2 = i*sz1 num=0.0 acc = 0 while j<sz2: acc += temp[j] j+=1 num+=1.0 mm = acc/num feat2.append(mm) i+=1 temp = hdf5_getters.get_segments_confidence(h5) sz = temp.size sz1 = sz/50 i=1 j=0 while i<=50: if i == 50: sz2 = sz else: sz2 = i*sz1 num=0.0 acc = 0 while j<sz2: acc += temp[j] j+=1 num+=1.0 mm = acc/num feat3.append(mm) i+=1 temp = hdf5_getters.get_tatums_confidence(h5) sz = temp.size sz1 = sz/50 i=1 j=0 while i<=50: if i == 50: sz2 = sz else: sz2 = i*sz1 num=0.0 acc = 0 while j<sz2: acc += temp[j] j+=1 num+=1.0 mm = acc/num feat4.append(mm) i+=1 i=0 avg = 0.0 while i<50: avg = (feat1[i] + feat2[i] + feat3[i] + feat4[i])/4.0 feat.append(avg) i++ temp = hdf5_getters.get_song_hotttnesss(h5) hott = 0 if temp >=0.75: hott = 1 elif temp >=0.40 and temp <0.75: hott = 2 else: hott = 3 feat.append(hott) h5.close() count = 1 f=open('MSD_DATASET_LSTM.txt', 'a') outstring='' cnt = 0 feat_size = len(feat) for i in feat: cnt+=1 outstring+=str(i) if (cnt!=feat_size): outstring+=',' outstring+='\n' f.write(outstring) f.close()
def get_fields(files): tracks = [] counts = {} field_counts = [] for file in files: h5 = hdf5_getters.open_h5_file_read(file) t = {} t['artist_familiarity'] = hdf5_getters.get_artist_familiarity( h5) # estimation t['artist_hotttnesss'] = hdf5_getters.get_artist_hotttnesss( h5) # estimation t['artist_name'] = hdf5_getters.get_artist_name(h5) # artist name t['release'] = hdf5_getters.get_release(h5) # album name t['title'] = hdf5_getters.get_title(h5) # title t['len_similar_artists'] = len( hdf5_getters.get_similar_artists(h5)) # number of similar artists t['analysis_sample_rate'] = hdf5_getters.get_analysis_sample_rate( h5) # sample rate of the audio used ????????? t['duration'] = hdf5_getters.get_duration(h5) # seconds t['key'] = hdf5_getters.get_key(h5) # key the song is in t['key_confidence'] = hdf5_getters.get_key_confidence( h5) # confidence measure t['loudness'] = hdf5_getters.get_loudness(h5) # overall loudness in dB t['mode_confidence'] = hdf5_getters.get_mode_confidence( h5) # confidence measure t['start_of_fade_out'] = hdf5_getters.get_start_of_fade_out( h5) # time in sec t['tempo'] = hdf5_getters.get_tempo(h5) # estimated tempo in BPM t['time_signature'] = hdf5_getters.get_time_signature( h5) # estimate of number of beats per bar, e.g. 4 t['year'] = hdf5_getters.get_year( h5) # song release year from MusicBrainz or 0 timbre = hdf5_getters.get_segments_timbre( h5) # 2D float array, texture features (MFCC+PCA-like) t['segments_timbre'] = timbre t['timbre_avg'] = timbre.mean(axis=0) # list of 12 averages cov_mat_timbre = np.cov(timbre, rowvar=False) cov_timbre = [] for i in range(len(cov_mat_timbre)): for j in range(len(cov_mat_timbre) - i): cov_timbre.append(cov_mat_timbre[i][j]) t['timbre_cov'] = cov_timbre # list of 78 covariances pitch = hdf5_getters.get_segments_pitches( h5) # 2D float array, chroma feature, one value per note t['segments_pitch'] = pitch t['pitch_avg'] = pitch.mean(axis=0) # list of 12 averages cov_mat_pitch = np.cov(pitch, rowvar=False) cov_pitch = [] for i in range(len(cov_mat_pitch)): for j in range(len(cov_mat_pitch) - i): cov_pitch.append(cov_mat_timbre[i][j]) t['pitch_cov'] = cov_pitch # list of 78 covariances # seg_pitch = hdf5_getters.get_segments_pitches(h5) # 2D float array, chroma feature, one value per note # print(seg_pitch.shape) # t['artist_latitude'] = hdf5_getters.get_artist_latitude(h5) # float, ???????????????????????????????????????? # t['artist_longitude'] = hdf5_getters.get_artist_longitude(h5) # float, ?????????????????????????????????????? # t['artist_location'] = hdf5_getters.get_artist_location(h5) # location name # t['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss(h5) # estimation # t['danceability'] = hdf5_getters.get_danceability(h5) # estimation # t['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5) # seconds at the beginning of the song # t['energy'] = hdf5_getters.get_energy(h5) # energy from listener point of view # t['mode'] = hdf5_getters.get_mode(h5) # major or minor # t['time_signature_confidence'] = hdf5_getters.get_time_signature_confidence(h5) # confidence measure # t['artist_mbtags_count'] = len(hdf5_getters.get_artist_mbtags_count(h5)) # array int, tag counts for musicbrainz tags # bad types or non arithmatic numbers ''' # t['audio_md5'] = hdf5_getters.get_audio_md5(h5) # hash code of the audio used for the analysis by The Echo Nest # t['artist_terms_weight'] = hdf5_getters.get_artist_terms_weight(h5) # array float, echonest tags weight ????? # t['artist_terms_freq'] = hdf5_getters.get_artist_terms_freq(h5) # array float, echonest tags freqs ?????????? # t['artist_terms'] = hdf5_getters.get_artist_terms(h5) # array string, echonest tags ????????????????????????? # t['artist_id'] = hdf5_getters.get_artist_id(h5) # echonest id # t['artist_mbid'] = hdf5_getters.get_artist_mbid(h5) # musicbrainz id # t['artist_playmeid'] = hdf5_getters.get_artist_playmeid(h5) # playme id # t['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid(h5) # 7digital id # t['release_7digitalid'] = hdf5_getters.get_release_7digitalid(h5) # 7digital id # t['song_id'] = hdf5_getters.get_song_id(h5) # echonest id # t['track_7digitalid'] = hdf5_getters.get_track_7digitalid(h5) # 7digital id # t['similar_artists'] = hdf5_getters.get_similar_artists(h5) # string array of sim artist ids # t['track_id'] = hdf5_getters.get_track_id(h5) # echonest track id # t['segments_start'] = hdf5_getters.get_segments_start(h5) # array floats, musical events, ~ note onsets # t['segments_confidence'] = hdf5_getters.get_segments_confidence(h5) # array floats, confidence measure # t['segments_pitches'] = hdf5_getters.get_segments_pitches(h5) # 2D float array, chroma feature, one value per note # t['segments_timbre'] = hdf5_getters.get_segments_timbre(h5) # 2D float array, texture features (MFCC+PCA-like) # t['segments_loudness_max'] = hdf5_getters.get_segments_loudness_max(h5) # float array, max dB value # t['segments_loudness_max_time'] = hdf5_getters.get_segments_loudness_max_time(h5) # float array, time of max dB value, i.e. end of attack # t['segments_loudness_start'] = hdf5_getters.get_segments_loudness_start(h5) # array float, dB value at onset # t['sections_start'] = hdf5_getters.get_sections_start(h5) # array float, largest grouping in a song, e.g. verse # t['sections_confidence'] = hdf5_getters.get_sections_confidence(h5) # array float, confidence measure # t['beats_start'] = hdf5_getters.get_beats_start(h5) # array float, result of beat tracking # t['beats_confidence'] = hdf5_getters.get_beats_confidence(h5) # array float, confidence measure # t['bars_start'] = hdf5_getters.get_bars_start(h5) # array float, beginning of bars, usually on a beat # t['bars_confidence'] = hdf5_getters.get_bars_confidence(h5) # array float, confidence measure # t['tatums_start'] = hdf5_getters.get_tatums_start(h5) # array float, smallest rythmic element # t['tatums_confidence'] = hdf5_getters.get_tatums_confidence(h5) # array float, confidence measure # t['artist_mbtags'] = hdf5_getters.get_artist_mbtags(h5) # array string, tags from musicbrainz.org ''' h5.close() for key, value in t.items(): if isinstance(value, float) and math.isnan(value): pass if type(value) is np.ndarray: if key in counts.keys(): counts[key] += 1 else: counts[key] = 1 elif value: if key in counts.keys(): counts[key] += 1 else: counts[key] = 1 elif key not in counts.keys(): counts[key] = 0 count = 0 for key, value in t.items(): if isinstance(value, float) and math.isnan(value): pass elif type(value) is np.ndarray: count += 1 elif value: count += 1 field_counts.append(count) # progress bar if num_of_tracks >= 100: i = files.index(file) + 1 scale = num_of_tracks / 100 if i % math.ceil(len(files) * .05) == 0: sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("Loading dataframe: [%-100s] %d%%" % ('=' * int(i // scale), 1 / scale * i)) sys.stdout.flush() time.sleep(.01) tracks.append(t) print() return tracks, counts, field_counts
def complete_hd5_to_csv(basedir): ext = '.h5' # Get all files with extension .h5 # Header title. Essentially it is a schema for all the following songs header = [ 'Title', 'Artist familiarity', 'Artist hotness', 'Artist ID', 'Artist mbID', 'Artist playmeid', 'Artist 7DigitalID', 'Artist latitude', 'Artist longitude', 'Artist location', 'Artist Name', 'Release', 'Release 7DigitalID', 'Song ID', 'Song Hotness', 'Track 7Digital', 'Analysis sample rate', 'Audio md5', 'Danceability', 'Duration', 'End of Fade', 'Energy', 'Key', 'Key Confidence', 'Loudness', 'Mode', 'Mode Confidence', 'Start of fade out', 'Tempo', 'Time signature', 'Time signature confidence', 'Track ID', 'Year' ] with open('Tester2.csv', 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') # writing the header line. This line contains the schema of the data csv_writer.writerow(header) # Read all files from the given directories for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) print(files) for f in files: h5 = hdf5_getters.open_h5_file_read(f) # Write as row all elements. NOTE: Only the serialized elements are parsed and not arrays csv_writer.writerow([ hdf5_getters.get_title(h5), hdf5_getters.get_artist_familiarity(h5), hdf5_getters.get_artist_hotttnesss(h5), hdf5_getters.get_artist_id(h5), hdf5_getters.get_artist_mbid(h5), hdf5_getters.get_artist_playmeid(h5), hdf5_getters.get_artist_7digitalid(h5), hdf5_getters.get_artist_latitude(h5), hdf5_getters.get_artist_longitude(h5), hdf5_getters.get_artist_location(h5), hdf5_getters.get_artist_name(h5), hdf5_getters.get_release(h5), hdf5_getters.get_release_7digitalid(h5), hdf5_getters.get_song_id(h5), hdf5_getters.get_song_hotttnesss(h5), hdf5_getters.get_track_7digitalid(h5), hdf5_getters.get_analysis_sample_rate(h5), hdf5_getters.get_audio_md5(h5), hdf5_getters.get_danceability(h5), hdf5_getters.get_duration(h5), hdf5_getters.get_end_of_fade_in(h5), hdf5_getters.get_energy(h5), hdf5_getters.get_key(h5), hdf5_getters.get_key_confidence(h5), hdf5_getters.get_loudness(h5), hdf5_getters.get_mode(h5), hdf5_getters.get_mode_confidence(h5), hdf5_getters.get_start_of_fade_out(h5), hdf5_getters.get_tempo(h5), hdf5_getters.get_time_signature(h5), hdf5_getters.get_time_signature_confidence(h5), hdf5_getters.get_track_id(h5), hdf5_getters.get_year(h5) ]) # For debugging purposes. Everything as expected # print() # print("Num of songs -- ", hdf5_getters.get_num_songs(h5)) # One song per file # print("Title -- ", hdf5_getters.get_title(h5)) # Print the title of a specific h5 file # print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5)) # print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5)) # print("Artist ID -- ", hdf5_getters.get_artist_id(h5)) # print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5)) # print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5)) # print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5)) # print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5)) # print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5)) # print("Artist location -- ", hdf5_getters.get_artist_location(h5)) # print("Artist Name -- ", hdf5_getters.get_artist_name(h5)) # print("Release -- ", hdf5_getters.get_release(h5)) # print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5)) # print("Song ID -- ", hdf5_getters.get_song_id(h5)) # print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5)) # print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5)) # print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5)) # print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5)) # print("Danceability -- ", hdf5_getters.get_danceability(h5)) # print("Duration -- ", hdf5_getters.get_duration(h5)) # print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5)) # print("Energy -- ", hdf5_getters.get_energy(h5)) # print("Key -- ", hdf5_getters.get_key(h5)) # print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5)) # print("Loudness -- ", hdf5_getters.get_loudness(h5)) # print("Mode -- ", hdf5_getters.get_mode(h5)) # print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5)) # print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5)) # print("Tempo -- ", hdf5_getters.get_tempo(h5)) # print("Time signature -- ", hdf5_getters.get_time_signature(h5)) # print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5)) # print("Track ID -- ", hdf5_getters.get_track_id(h5)) # # print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5)) # # print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5)) # print("Year -- ", hdf5_getters.get_year(h5)) h5.close()
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+ " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString); csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+ "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+ "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+ "Title,Year") ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,"); outputFile1.write(csvRowString + "\n"); csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "." # "." As the default means the current directory ext = ".H5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',',"") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',','') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
def hd5_single_random_file_parser(): # Open an h5 file in read mode h5 = hdf5_getters.open_h5_file_read( '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5' ) function_tracker = filter( lambda x: x.startswith('get'), hdf5_getters.__dict__.keys()) # Detects all the getter functions for f in function_tracker: # Print everything in function tracker print(f) # First effort to check what each field contains. print() # 55 available fields (exluding number of songs fields) print("Num of songs -- ", hdf5_getters.get_num_songs(h5)) # One song per file print("Title -- ", hdf5_getters.get_title(h5)) # Print the title of a specific h5 file print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5)) print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5)) print("Artist ID -- ", hdf5_getters.get_artist_id(h5)) print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5)) print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5)) print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5)) print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5)) print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5)) print("Artist location -- ", hdf5_getters.get_artist_location(h5)) print("Artist Name -- ", hdf5_getters.get_artist_name(h5)) print("Release -- ", hdf5_getters.get_release(h5)) print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5)) print("Song ID -- ", hdf5_getters.get_song_id(h5)) print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5)) print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5)) print("Similar artists -- ", hdf5_getters.get_similar_artists(h5)) print("Artist terms -- ", hdf5_getters.get_artist_terms(h5)) print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5)) print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5)) print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5)) print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5)) print("Danceability -- ", hdf5_getters.get_danceability(h5)) print("Duration -- ", hdf5_getters.get_duration(h5)) print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5)) print("Energy -- ", hdf5_getters.get_energy(h5)) print("Key -- ", hdf5_getters.get_key(h5)) print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5)) print("Loudness -- ", hdf5_getters.get_loudness(h5)) print("Mode -- ", hdf5_getters.get_mode(h5)) print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5)) print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5)) print("Tempo -- ", hdf5_getters.get_tempo(h5)) print("Time signature -- ", hdf5_getters.get_time_signature(h5)) print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5)) print("Track ID -- ", hdf5_getters.get_track_id(h5)) print("Segments Start -- ", hdf5_getters.get_segments_start(h5)) print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5)) print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5)) print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5)) print("Segments Loudness max -- ", hdf5_getters.get_segments_loudness_max(h5)) print("Segments Loudness max time-- ", hdf5_getters.get_segments_loudness_max_time(h5)) print("Segments Loudness start -- ", hdf5_getters.get_segments_loudness_start(h5)) print("Sections start -- ", hdf5_getters.get_sections_start(h5)) print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5)) print("Beats start -- ", hdf5_getters.get_beats_start(h5)) print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5)) print("Bars start -- ", hdf5_getters.get_bars_start(h5)) print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5)) print("Tatums start -- ", hdf5_getters.get_tatums_start(h5)) print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5)) print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5)) print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5)) print("Year -- ", hdf5_getters.get_year(h5)) fields = ['Title', 'Artist ID'] with open('Tester2.csv', 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') # writing the fields csv_writer.writerow(fields) # writing the data rows csv_writer.writerow( [hdf5_getters.get_title(h5), hdf5_getters.get_artist_id(h5)]) h5.close() # close h5 when completed in the end
def data_to_flat_file(basedir, ext='.h5'): """This function extract the information from the tables and creates the flat file.""" count = 0 #song counter list_to_write = [] row_to_write = "" writer = csv.writer(open("metadata_wholeA.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title = title.replace('"', '') comma = title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album = album.replace('"', '') #eliminating commas in the album comma = album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma = artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name = artist_name.replace('"', '') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam = -1 artist_hotness = hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness = -1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat = -1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,") if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon = -1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability = -1 end_fade_in = hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in = -1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy = -1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c = -1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness = -1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf = -1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot = -1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo = -1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c = -1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg = get_avg(bars_c) bars_c_max = get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev = get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max = get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev = get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg = get_avg(beats_c) beats_c_max = get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev = get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max = get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev = get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg = get_avg(sec_c) sec_c_max = get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev = get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max = get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev = get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg = get_avg(seg_c) seg_c_max = get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev = get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg = get_avg(seg_loud_max) seg_loud_max_max = get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev = get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg = get_avg(seg_loud_max_time) seg_loud_max_time_max = get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev = get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg = get_avg(seg_loud_start) seg_loud_start_max = get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev = get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg = get_avg(seg_start) seg_start_max = get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev = get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg = get_avg(tatms_c) tatms_c_max = get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev = get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg = get_avg(tatms_start) tatms_start_max = get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev = get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes = get_genre_indexes( trm_freq) #index of the highest freq final_genre = [] genres_so_far = [] for i in range(len(genre_indexes)): genre_tmp = get_genre( art_trm, genre_indexes[i] ) #genre that corresponds to the highest freq genres_so_far = genre_dict.get_genre_in_dict( genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set = 1 #genre was found in dictionary if genre_set == 1: col_num = [] for genre in final_genre: column = int( genre) #getting the column number of the genre col_num.append(column) genre_array = genre_columns(col_num) #genre array else: genre_array = genre_columns( -1) #the genre was not found in the dictionary transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg = [] seg_pitch_max = [] seg_pitch_min = [] seg_pitch_stddev = [] seg_pitch_count = [] seg_pitch_sum = [] i = 0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i = i + 1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg = [] seg_timbre_max = [] seg_timbre_min = [] seg_timbre_stddev = [] seg_timbre_count = [] seg_timbre_sum = [] i = 0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i = i + 1 #Writing to the flat file writer.writerow([ title, album, artist_name, year, duration, seg_start_count, tempo ]) h5.close() count = count + 1 print count
def data_to_flat_file(basedir,ext='.h5') : """This function extract the information from the tables and creates the flat file.""" count = 0; #song counter list_to_write= [] row_to_write = "" writer = csv.writer(open("metadata.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') comma=title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') #eliminating commas in the album comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,"); if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg= get_avg(bars_c) bars_c_max= get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev= get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max= get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev= get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg= get_avg(beats_c) beats_c_max= get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev= get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max= get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev= get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg= get_avg(sec_c) sec_c_max= get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev= get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max= get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev= get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg= get_avg(seg_c) seg_c_max= get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev= get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg= get_avg(seg_loud_max) seg_loud_max_max= get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev= get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg= get_avg(seg_loud_max_time) seg_loud_max_time_max= get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg= get_avg(seg_loud_start) seg_loud_start_max= get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev= get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg= get_avg(seg_start) seg_start_max= get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev= get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg= get_avg(tatms_c) tatms_c_max= get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev= get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg= get_avg(tatms_start) tatms_start_max= get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev= get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 #genre was found in dictionary if genre_set == 1: col_num=[] for genre in final_genre: column=int(genre) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array else: genre_array=genre_columns(-1) #the genre was not found in the dictionary transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 #Writing to the flat file writer.writerow([title,album,artist_name,duration,samp_rt,artist_7digitalid,artist_fam,artist_hotness,artist_id,artist_lat,artist_loc,artist_lon,artist_mbid,genre_array[0],genre_array[1],genre_array[2], genre_array[3],genre_array[4],genre_array[5],genre_array[6],genre_array[7],genre_array[8],genre_array[9],genre_array[10],genre_array[11],genre_array[12],genre_array[13],genre_array[14],genre_array[15], genre_array[16],genre_array[17],genre_array[18],genre_array[19],genre_array[20],genre_array[21],genre_array[22],genre_array[23],genre_array[24],genre_array[25],genre_array[26], genre_array[27],genre_array[28],genre_array[29],genre_array[30],genre_array[31],genre_array[32],genre_array[33],genre_array[34],genre_array[35],genre_array[36],genre_array[37],genre_array[38], genre_array[39],genre_array[40],genre_array[41],genre_array[42],genre_array[43],genre_array[44],genre_array[45],genre_array[46],genre_array[47],genre_array[48],genre_array[49], genre_array[50],genre_array[51],genre_array[52],genre_array[53],genre_array[54],genre_array[55],genre_array[56],genre_array[57],genre_array[58],genre_array[59], genre_array[60],genre_array[61],genre_array[62],genre_array[63],genre_array[64],genre_array[65],genre_array[66],genre_array[67],genre_array[68],genre_array[69], genre_array[70],genre_array[71],genre_array[72],genre_array[73],genre_array[74],genre_array[75],genre_array[76],genre_array[77],genre_array[78],genre_array[79], genre_array[80],genre_array[81],genre_array[82],genre_array[83],genre_array[84],genre_array[85],genre_array[86],genre_array[87],genre_array[88],genre_array[89], genre_array[90],genre_array[91],genre_array[92],genre_array[93],genre_array[94],genre_array[95],genre_array[96],genre_array[97],genre_array[98],genre_array[99],genre_array[100],genre_array[101], genre_array[102],genre_array[103],genre_array[104],genre_array[105],genre_array[106],genre_array[107],genre_array[108],genre_array[109],genre_array[110],genre_array[111],genre_array[112], genre_array[113],genre_array[114],genre_array[115],genre_array[116],genre_array[117],genre_array[118],genre_array[119],genre_array[120],genre_array[121],genre_array[122],genre_array[123], genre_array[124],genre_array[125],genre_array[126],genre_array[127],genre_array[128],genre_array[129],genre_array[130],genre_array[131],genre_array[132], artist_pmid,audio_md5,danceability,end_fade_in,energy,song_key,key_c,loudness,mode,mode_conf,release_7digitalid,song_hot,song_id,start_fade_out,tempo,time_sig,time_sig_c,track_id,track_7digitalid,year,bars_c_avg,bars_c_max,bars_c_min,bars_c_stddev,bars_c_count,bars_c_sum,bars_start_avg,bars_start_max,bars_start_min,bars_start_stddev,bars_start_count,bars_start_sum,beats_c_avg,beats_c_max,beats_c_min,beats_c_stddev,beats_c_count,beats_c_sum,beats_start_avg,beats_start_max,beats_start_min, beats_start_stddev,beats_start_count,beats_start_sum, sec_c_avg,sec_c_max,sec_c_min,sec_c_stddev,sec_c_count,sec_c_sum,sec_start_avg,sec_start_max,sec_start_min,sec_start_stddev,sec_start_count,sec_start_sum,seg_c_avg,seg_c_max,seg_c_min,seg_c_stddev,seg_c_count,seg_c_sum,seg_loud_max_avg,seg_loud_max_max,seg_loud_max_min,seg_loud_max_stddev,seg_loud_max_count,seg_loud_max_sum,seg_loud_max_time_avg,seg_loud_max_time_max,seg_loud_max_time_min,seg_loud_max_time_stddev,seg_loud_max_time_count,seg_loud_max_time_sum,seg_loud_start_avg,seg_loud_start_max,seg_loud_start_min,seg_loud_start_stddev,seg_loud_start_count,seg_loud_start_sum,seg_pitch_avg[0],seg_pitch_max[0],seg_pitch_min[0],seg_pitch_stddev[0],seg_pitch_count[0],seg_pitch_sum[0],seg_pitch_avg[1],seg_pitch_max[1],seg_pitch_min[1],seg_pitch_stddev[1],seg_pitch_count[1],seg_pitch_sum[1],seg_pitch_avg[2],seg_pitch_max[2],seg_pitch_min[2],seg_pitch_stddev[2],seg_pitch_count[2],seg_pitch_sum[2],seg_pitch_avg[3],seg_pitch_max[3],seg_pitch_min[3],seg_pitch_stddev[3],seg_pitch_count[3],seg_pitch_sum[3],seg_pitch_avg[4],seg_pitch_max[4],seg_pitch_min[4],seg_pitch_stddev[4],seg_pitch_count[4],seg_pitch_sum[4],seg_pitch_avg[5],seg_pitch_max[5],seg_pitch_min[5],seg_pitch_stddev[5],seg_pitch_count[5],seg_pitch_sum[5],seg_pitch_avg[6],seg_pitch_max[6],seg_pitch_min[6],seg_pitch_stddev[6],seg_pitch_count[6],seg_pitch_sum[6],seg_pitch_avg[7],seg_pitch_max[7],seg_pitch_min[7],seg_pitch_stddev[7],seg_pitch_count[7],seg_pitch_sum[7],seg_pitch_avg[8],seg_pitch_max[8],seg_pitch_min[8],seg_pitch_stddev[8],seg_pitch_count[8],seg_pitch_sum[8],seg_pitch_avg[9],seg_pitch_max[9],seg_pitch_min[9],seg_pitch_stddev[9],seg_pitch_count[9],seg_pitch_sum[9],seg_pitch_avg[10],seg_pitch_max[10],seg_pitch_min[10],seg_pitch_stddev[10],seg_pitch_count[10],seg_pitch_sum[10],seg_pitch_avg[11],seg_pitch_max[11],seg_pitch_min[11], seg_pitch_stddev[11],seg_pitch_count[11],seg_pitch_sum[11],seg_start_avg,seg_start_max,seg_start_min,seg_start_stddev, seg_start_count,seg_start_sum,seg_timbre_avg[0],seg_timbre_max[0],seg_timbre_min[0],seg_timbre_stddev[0],seg_timbre_count[0], seg_timbre_sum[0],seg_timbre_avg[1],seg_timbre_max[1],seg_timbre_min[1],seg_timbre_stddev[1],seg_timbre_count[1], seg_timbre_sum[1],seg_timbre_avg[2],seg_timbre_max[2],seg_timbre_min[2],seg_timbre_stddev[2],seg_timbre_count[2], seg_timbre_sum[2],seg_timbre_avg[3],seg_timbre_max[3],seg_timbre_min[3],seg_timbre_stddev[3],seg_timbre_count[3], seg_timbre_sum[3],seg_timbre_avg[4],seg_timbre_max[4],seg_timbre_min[4],seg_timbre_stddev[4],seg_timbre_count[4], seg_timbre_sum[4],seg_timbre_avg[5],seg_timbre_max[5],seg_timbre_min[5],seg_timbre_stddev[5],seg_timbre_count[5], seg_timbre_sum[5],seg_timbre_avg[6],seg_timbre_max[6],seg_timbre_min[6],seg_timbre_stddev[6],seg_timbre_count[6], seg_timbre_sum[6],seg_timbre_avg[7],seg_timbre_max[7],seg_timbre_min[7],seg_timbre_stddev[7],seg_timbre_count[7], seg_timbre_sum[7],seg_timbre_avg[8],seg_timbre_max[8],seg_timbre_min[8],seg_timbre_stddev[8],seg_timbre_count[8], seg_timbre_sum[8],seg_timbre_avg[9],seg_timbre_max[9],seg_timbre_min[9],seg_timbre_stddev[9],seg_timbre_count[9], seg_timbre_sum[9],seg_timbre_avg[10],seg_timbre_max[10],seg_timbre_min[10],seg_timbre_stddev[10],seg_timbre_count[10], seg_timbre_sum[10],seg_timbre_avg[11],seg_timbre_max[11],seg_timbre_min[11],seg_timbre_stddev[11],seg_timbre_count[11], seg_timbre_sum[11],tatms_c_avg,tatms_c_max,tatms_c_min,tatms_c_stddev,tatms_c_count,tatms_c_sum,tatms_start_avg,tatms_start_max,tatms_start_min,tatms_start_stddev,tatms_start_count,tatms_start_sum]) h5.close() count=count+1; print count;
# Get tempo tempo = hdf5_getters.get_tempo(h5) ######################################################### # Get analysis sample rate analysis_rate = hdf5_getters.get_analysis_sample_rate(h5) # Get end of fade in end_of_fade_in = hdf5_getters.get_end_of_fade_in(h5) # Get key key = hdf5_getters.get_key(h5) # Get key confidence key_confidence = hdf5_getters.get_key_confidence(h5) # Get mode mode = hdf5_getters.get_mode(h5) # Get mode confidence mode_confidence = hdf5_getters.get_mode_confidence(h5) # Get start of fade-out start_of_fade_out = hdf5_getters.get_start_of_fade_out(h5) # Get time signature time_signature = hdf5_getters.get_time_signature(h5) # Get time signature confidence time_signature_conf = hdf5_getters.get_time_signature_confidence(
def convert_to_csv(): i = 0 data = [] target = [] count = 0 with open('data_timbre.csv', 'w+') as f: with open('target_timbre.csv', 'w+') as f2: writer = csv.writer(f) target_writer = csv.writer(f2) for root, dirs, files in os.walk(msd_subset_data_path): files = glob.glob(os.path.join(root, '*.h5')) for f in sorted(files): try: # Opening is very prone to causing exceptions, we'll just skip file if exception is thrown h5 = getter.open_h5_file_read(f) year = getter.get_year(h5) if year: count += 1 analysis_file = open('current_analysis_status.txt', 'a') update = "Currently at file name: " + str( f) + " and at number " + str(count) + "\n" analysis_file.write(update) print update analysis_file.close() target.append([year]) row = [] timbre = getter.get_segments_timbre(h5) segstarts = getter.get_segments_start(h5) btstarts = getter.get_beats_start(h5) duration = getter.get_duration(h5) end_of_fade_in = getter.get_end_of_fade_in(h5) key = getter.get_key(h5) key_confidence = getter.get_key_confidence(h5) loudness = getter.get_loudness(h5) start_of_fade_out = getter.get_start_of_fade_out( h5) tempo = getter.get_tempo(h5) time_signature = getter.get_time_signature(h5) time_signature_confidence = getter.get_time_signature_confidence( h5) h5.close() # VERY IMPORTANT segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() bttimbre = align_feats( timbre.T, segstarts, btstarts, duration, end_of_fade_in, key, key_confidence, loudness, start_of_fade_out, tempo, time_signature, time_signature_confidence) if bttimbre is None: continue # Skip this track, some features broken npicks, winsize, finaldim = 12, 12, 144 # Calculated by 12 * 12. 12 is fixed as number of dimensions. processed_feats = extract_and_compress( bttimbre, npicks, winsize, finaldim) n_p_feats = processed_feats.shape[0] if processed_feats is None: continue # Skip this track, some features broken row = processed_feats.flatten() if len( row ) != 12 * 144: # 12 dimensions * 144 features per dimension continue # Not enough features year_row = np.array([year]) if row.any() and year_row.any(): writer.writerow(row) target_writer.writerow(year_row) i += 1 else: h5.close() except Exception: pass print 'Finished!' analysis_file = open('current_analysis_status.txt', 'a') analysis_file.write('Done!') analysis_file.close() return
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ( "SongID,AlbumID,AlbumName,TrackId,ArtistID,ArtistLatitude,ArtistLocation," + "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature," + "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence," + "Title,Year") ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,") outputFile1.write(csvRowString + "\n") csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "/home/umwangye/millonsong/MillionSongSubset/data/" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) #song = Song(str(hdf5_getters.get_song_id(songH5File))) #testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) numPerH5 = hdf5_getters.get_num_songs(songH5File) for cnt in range(numPerH5): song = Song(str(hdf5_getters.get_song_id(songH5File, cnt))) song.trackId = str(hdf5_getters.get_track_id(songH5File, cnt)) song.artistID = str(hdf5_getters.get_artist_id( songH5File, cnt)) song.albumID = str( hdf5_getters.get_release_7digitalid(songH5File, cnt)) song.albumName = str(hdf5_getters.get_release(songH5File, cnt)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File, cnt)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File, cnt)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File, cnt)) song.artistName = str( hdf5_getters.get_artist_name(songH5File, cnt)) song.danceability = str( hdf5_getters.get_danceability(songH5File, cnt)) song.duration = str(hdf5_getters.get_duration(songH5File, cnt)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File, cnt)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File, cnt)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File, cnt)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File, cnt)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence( songH5File, cnt)) song.title = str(hdf5_getters.get_title(songH5File, cnt)) song.year = str(hdf5_getters.get_year(songH5File, cnt)) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',', "") csvRowString += "\"" + albumName + "\"" elif attribute == 'TrackId'.lower(): csvRowString += song.trackId elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',', '') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, Year and Hotttnesss.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'SongHotttnesss'.lower(): csvRowString += 'SongHotttnesss' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print("==============") print("I believe there has been an error with the input.") print("==============") break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ( "SongID,AlbumID,AlbumName,ArtistFamiliarity,ArtistHotttnesss,ArtistID," + "ArtistLatitude,ArtistLocation," + "ArtistLongitude,ArtistName,BarsConfidence,BarsStart,BeatsConfidence," + "BeatsStart,Danceability,Duration,EndOfFadeIn,Energy,KeySignature," + "KeySignatureConfidence,Loudness,Mode,ModeConfidence,SectionsConfidence," + "SectionsStart,SegmentsConfidence,SegmentsLoudnessMax," + "SegmentsLoudnessMaxTime,SegmentsLoudnessMaxStart,SegmentsPitches," + "SegmentsStart,SegmentsTimbre,SongHotttnesss,TatumsConfidence," + "TatumsStart,Tempo,TimeSignature," + "TimeSignatureConfidence,Title,Year") ''' csvRowString = ("SongID,AlbumID,AlbumName,ArtistFamiliarity,"+ "ArtistHotttnesss,ArtistID,"+ "ArtistLatitude,ArtistLocation,"+ "ArtistLongitude,ArtistName,"+ "BarsConfidence,BarsStart,"+ "Danceability,Duration,EndOfFadeIn,Energy,KeySignature,"+ "KeySignatureConfidence,Loudness,Mode,ModeConfidence,"+ "SegmentsPitches,"+ "SegmentsTimbre,SongHotttnesss,"+ "Tempo,TimeSignature,"+ "TimeSignatureConfidence,Title,Year") ''' ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,") outputFile1.write(csvRowString + "\n") csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate #basedir = "./I/" basedir = "./MillionSongSubset/data/" # "." As the default means the current directory #basedir = "/Users/dafirebanks/Downloads/MillionSongSubset/data/" # "." As the default means the current directory ext = "*.h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, ext)) for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) #testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistFamiliarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotttnesss = str( hdf5_getters.get_artist_hotttnesss(songH5File)) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.barsConfidence = str( hdf5_getters.get_bars_confidence(songH5File)).replace( ",", "").replace("\n", "") song.barsStart = str( hdf5_getters.get_bars_start(songH5File)).replace(",", "").replace( "\n", "") song.beatsConfidence = str( hdf5_getters.get_beats_confidence(songH5File)).replace( ",", "").replace("\n", "") song.beatsStart = str( hdf5_getters.get_beats_start(songH5File)).replace(",", "").replace( "\n", "") song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) # song.setGenreList() song.hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.modeConfidence = str( hdf5_getters.get_mode_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.sectionsConfidence = str( hdf5_getters.get_sections_confidence(songH5File)).replace( ",", "").replace("\n", "") song.sectionsStart = str( hdf5_getters.get_segments_start(songH5File)).replace( ",", "").replace("\n", "") song.segmentsConfidence = str( hdf5_getters.get_segments_confidence(songH5File)).replace( ",", "").replace("\n", "") song.segmentsLoudnessMax = str( hdf5_getters.get_segments_loudness_max(songH5File)).replace( ",", "").replace("\n", "") song.segmentsLoudnessMaxTime = str( hdf5_getters.get_segments_loudness_max_time( songH5File)).replace(",", "").replace("\n", "") song.segmentsLoudnessMaxStart = str( hdf5_getters.get_segments_loudness_start(songH5File)).replace( ",", "").replace("\n", "") song.segmentsPitches = str( hdf5_getters.get_segments_pitches(songH5File)).replace( ",", "").replace("\n", "") song.segmentsStart = str( hdf5_getters.get_segments_start(songH5File)).replace( ",", "").replace("\n", "") song.segmentsTimbre = str( hdf5_getters.get_segments_timbre(songH5File)).replace( ",", "").replace("\n", "") song.startOfFadeOut = str( hdf5_getters.get_start_of_fade_out(songH5File)) song.tatumsConfidence = str( hdf5_getters.get_tatums_confidence(songH5File)).replace( ",", "").replace("\n", "") song.tatumsStart = str( hdf5_getters.get_tatums_start(songH5File)).replace( ",", "").replace("\n", "") song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',', "") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistFamiliarity'.lower(): csvRowString += song.artistFamiliarity elif attribute == 'ArtistHotttnesss'.lower(): csvRowString += song.artistHotttnesss elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',', '') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'BarsConfidence'.lower(): csvRowString += song.barsConfidence elif attribute == 'BarsStart'.lower(): csvRowString += song.barsStart elif attribute == 'BeatsConfidence'.lower(): csvRowString += song.beatsConfidence elif attribute == 'BeatsStart'.lower(): csvRowString += song.beatsStart elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'EndOfFadeIn'.lower(): csvRowString += song.endOfFadeIn elif attribute == 'Energy'.lower(): csvRowString += song.energy elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'Loudness'.lower(): csvRowString += song.loudness elif attribute == 'Mode'.lower(): csvRowString += song.mode elif attribute == 'ModeConfidence'.lower(): csvRowString += song.modeConfidence elif attribute == 'SectionsConfidence'.lower(): csvRowString += song.sectionsConfidence elif attribute == 'SectionsStart'.lower(): csvRowString += song.sectionsStart elif attribute == 'SegmentsConfidence'.lower(): csvRowString += song.segmentsConfidence elif attribute == 'SegmentsLoudnessMax'.lower(): csvRowString += song.segmentsLoudnessMax elif attribute == 'SegmentsLoudnessMaxTime'.lower(): csvRowString += song.segmentsLoudnessMaxTime elif attribute == 'SegmentsLoudnessMaxStart'.lower(): csvRowString += song.segmentsLoudnessMaxStart elif attribute == 'SegmentsPitches'.lower(): csvRowString += song.segmentsPitches elif attribute == 'SegmentsStart'.lower(): csvRowString += song.segmentsStart elif attribute == 'SegmentsTimbre'.lower(): csvRowString += song.segmentsTimbre elif attribute == 'SongHotttnesss'.lower(): csvRowString += song.hotttnesss elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'StartOfFadeOut'.lower(): csvRowString += song.startOfFadeOut elif attribute == 'TatumsConfidence'.lower(): csvRowString += song.tatumsConfidence elif attribute == 'TatumsStart'.lower(): csvRowString += song.tatumsStart elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()