def process_song(self, song_path): song_data = h5.open_h5_file_read(song_path) song_id = h5.get_song_id(song_data).decode('UTF-8') song_int_id = int(h5.get_track_7digitalid(song_data)) song_name = h5.get_title(song_data).decode('UTF-8').lower() artist_name = h5.get_artist_name(song_data).decode('UTF-8').lower() song_year = int(h5.get_year(song_data)) timbre = self.ndarray_list_to_ndlist(h5.get_segments_timbre(song_data)) chroma = self.ndarray_list_to_ndlist( h5.get_segments_pitches(song_data)) song_data.close() song_dict = { 'id': song_int_id, 'source_id': song_id, 'name': song_name, 'artist': artist_name, 'year': song_year, 'timbre': timbre, 'chroma': chroma } return song_dict
def process_song(self, song_path): # read file song_data = h5.open_h5_file_read(song_path) # process file #song_id = h5.get_song_id(song_data).decode('UTF-8') song_int_id = int(h5.get_track_7digitalid(song_data)) song_name = h5.get_title(song_data).decode('UTF-8').lower() artist_name = h5.get_artist_name(song_data).decode('UTF-8').lower() song_year = int(h5.get_year(song_data)) sp = SpotifyInterface() track_info = sp.search_track_info(artist_name, song_name) if track_info == None: song_data.close() return None timbre = self.ndarray_list_to_ndlist(h5.get_segments_timbre(song_data)) chroma = self.ndarray_list_to_ndlist(h5.get_segments_pitches(song_data)) song_data.close() song_dict = {'id': song_int_id, 'name': song_name, 'artist': artist_name, 'year': song_year, 'timbre': timbre, 'chroma': chroma, **track_info} return song_dict
def fetch_song_from_h5(h5_filepath): basename, extension = os.path.splitext(h5_filepath) if extension != '.h5': return audio_filepath = basename + '.mp3' h5 = hdf5_getters.open_h5_file_read(h5_filepath) track_id = hdf5_getters.get_track_7digitalid(h5) track_name = hdf5_getters.get_title(h5) artist_name = hdf5_getters.get_artist_name(h5) h5.close() consumer = oauth.Consumer(OAUTH_CLIENT_KEY, OAUTH_CLIENT_SECRET) token = oauth.Token(OAUTH_ACCESS_TOKEN, OAUTH_ACCESS_SECRET) request = oauth.Request.from_consumer_and_token( consumer, http_url=get_clip_url(track_id), is_form_encoded=True, parameters={'country': 'ww'}) signing_method = oauth.SignatureMethod_HMAC_SHA1() request.sign_request(signing_method, consumer, token) url = request.to_url() r = requests.get(url) if r.status_code not in (requests.codes.ok, requests.codes.not_found): print(r.status_code, r.headers, r.content) exit() if r.status_code == requests.codes.ok: print('FETCHED track {0} {1} {2}'.format( track_id, artist_name, track_name)) with open(audio_filepath, 'wb') as f: f.write(r.content) else: print('FAILED TO FETCH track {0} {1} {2}'.format( track_id, artist_name, track_name))
def _extractSongData(file_path, filename): # song_id, title, release, artist_name, year h5 = hdf5_getters.open_h5_file_read(file_path) track_id = filename[:-3] song_id = hdf5_getters.get_song_id(h5).decode('UTF-8') dig7_id = hdf5_getters.get_track_7digitalid(h5) title = hdf5_getters.get_title(h5).decode('UTF-8') release = hdf5_getters.get_release(h5).decode('UTF-8') artist_name = hdf5_getters.get_artist_name(h5).decode('UTF-8') year = hdf5_getters.get_year(h5) h5.close() # print(song_id, track_id, dig7_id, title, release, artist_name, year) return track_id, song_id, dig7_id, title, release, artist_name, year
def getURLFromH5(h5path): if not os.path.isfile(h5path): print 'invalid path (not a file):', h5path sys.exit(0) h5 = hdf5_utils.open_h5_file_read(h5path) track_7digitalid = GETTERS.get_track_7digitalid(h5) release_7digitalid = GETTERS.get_release_7digitalid(h5) artist_7digitalid = GETTERS.get_artist_7digitalid(h5) artist_name = GETTERS.get_artist_name(h5) release_name = GETTERS.get_release(h5) track_name = GETTERS.get_title(h5) h5.close() # we already have the 7digital track id? way too easy! if track_7digitalid >= 0: preview = get_preview_from_trackid(track_7digitalid) if preview == '': print 'something went wrong when looking by track id' else: print preview return preview sys.exit(0)
def get7DigitalIDs(self, file): #get the 7digital id h5 = GETTERS.open_h5_file_read(file) id7 = GETTERS.get_track_7digitalid(h5) song_id = GETTERS.get_song_id(h5).decode("UTF-8") # print(id7) #put it into the database try: self.cursor.execute("UPDATE songs SET track_7digitalid = {} WHERE song_id == '{}';".format(id7, song_id)) self.conn.commit() except Exception as e: print ("couldn't insert into database") print ("reason: ", e) h5.close() return -1 #close the h5 file h5.close() return 0
def get_preview_dft(h5): """ Ask for the preview to a particular track, get the XML answer After calling the API with a given track id, we get an XML response that looks like: <response status="ok" version="1.2" xsi:noNamespaceSchemaLocation="http://api.7digital.com/1.2/static/7digitalAPI.xsd"> <url> http://previews.7digital.com/clips/34/6804688.clip.mp3 </url> </response> We parse it for the URL that we return, or '' if a problem """ trackid = Getters.get_track_7digitalid(h5) previewUrl = sevenD.preview_url(trackid) mp3Url = urllib2.urlopen(previewUrl) mp3Temp= mktemp('.mp3') with open(mp3Temp, 'wb') as output: output.write(mp3Url.read()) # todo: remove mp3 file dft = mp3.mp3ToDFT(mp3Temp) os.remove(mp3Temp) return dft
def getURLFromH5(h5path): if not os.path.isfile(h5path): print 'invalid path (not a file):',h5path sys.exit(0) h5 = hdf5_utils.open_h5_file_read(h5path) track_7digitalid = GETTERS.get_track_7digitalid(h5) release_7digitalid = GETTERS.get_release_7digitalid(h5) artist_7digitalid = GETTERS.get_artist_7digitalid(h5) artist_name = GETTERS.get_artist_name(h5) release_name = GETTERS.get_release(h5) track_name = GETTERS.get_title(h5) h5.close() # we already have the 7digital track id? way too easy! print "Suggested Song URLs For you" print "===========================" if track_7digitalid >= 0: preview = get_preview_from_trackid(track_7digitalid) if preview == '': print 'something went wrong when looking by track id' else: print preview return preview sys.exit(0)
def data_to_flat_file(basedir,ext='.h5') : """ This function extracts the information from the tables and creates the flat file. """ count = 0; #song counter list_to_write= [] group_index=0 row_to_write = "" writer = csv.writer(open("complete.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: row=[] print f h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') row.append(title) comma=title.find(',') if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') row.append(album) comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') row.append(artist_name) duration = hdf5_getters.get_duration(h5) row.append(duration) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) row.append(samp_rt) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) row.append(artist_7digitalid) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 row.append(artist_fam) artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 row.append(artist_hotness) artist_id = hdf5_getters.get_artist_id(h5) row.append(artist_id) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 row.append(artist_lat) artist_loc = hdf5_getters.get_artist_location(h5) row.append(artist_loc) artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 row.append(artist_lon) artist_mbid = hdf5_getters.get_artist_mbid(h5) row.append(artist_mbid) #Getting the genre art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq genre_set=0 #flag to see if the genre has been set or not final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 if genre_set == 1: col_num=[] for i in final_genre: column=int(i) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array for i in range(len(genre_array)): #appending the genre_array to the row row.append(genre_array[i]) else: genre_array=genre_columns(-1) #when there is no genre matched, return an array of [0...0] for i in range(len(genre_array)): #appending the genre_array to the row row.append(genre_array[i]) artist_pmid = hdf5_getters.get_artist_playmeid(h5) row.append(artist_pmid) audio_md5 = hdf5_getters.get_audio_md5(h5) row.append(audio_md5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 row.append(danceability) end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 row.append(end_fade_in) energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 row.append(energy) song_key = hdf5_getters.get_key(h5) row.append(song_key) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 row.append(key_c) loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 row.append(loudness) mode = hdf5_getters.get_mode(h5) row.append(mode) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 row.append(mode_conf) release_7digitalid = hdf5_getters.get_release_7digitalid(h5) row.append(release_7digitalid) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 row.append(song_hot) song_id = hdf5_getters.get_song_id(h5) row.append(song_id) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) row.append(start_fade_out) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 row.append(tempo) time_sig = hdf5_getters.get_time_signature(h5) row.append(time_sig) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 row.append(time_sig_c) track_id = hdf5_getters.get_track_id(h5) row.append(track_id) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) row.append(track_7digitalid) year = hdf5_getters.get_year(h5) row.append(year) bars_c = hdf5_getters.get_bars_confidence(h5) bars_start = hdf5_getters.get_bars_start(h5) row_bars_padding=padding(245) #this is the array that will be attached at the end of th row #--------------bars---------------" gral_info=[] gral_info=row[:] empty=[] for i,item in enumerate(bars_c): row.append(group_index) row.append(i) row.append(bars_c[i]) bars_c_avg= get_avg(bars_c) row.append(bars_c_avg) bars_c_max= get_max(bars_c) row.append(bars_c_max) bars_c_min = get_min(bars_c) row.append(bars_c_min) bars_c_stddev= get_stddev(bars_c) row.append(bars_c_stddev) bars_c_count = get_count(bars_c) row.append(bars_c_count) bars_c_sum = get_sum(bars_c) row.append(bars_c_sum) row.append(bars_start[i]) bars_start_avg = get_avg(bars_start) row.append(bars_start_avg) bars_start_max= get_max(bars_start) row.append(bars_start_max) bars_start_min = get_min(bars_start) row.append(bars_start_min) bars_start_stddev= get_stddev(bars_start) row.append(bars_start_stddev) bars_start_count = get_count(bars_start) row.append(bars_start_count) bars_start_sum = get_sum(bars_start) row.append(bars_start_sum) for i in row_bars_padding: row.append(i) writer.writerow(row) row=[] row=gral_info[:] #--------beats---------------" beats_c = hdf5_getters.get_beats_confidence(h5) group_index=1 row=[] row=gral_info[:] row_front=padding(14) #blanks left in front of the row(empty spaces for bars) row_beats_padding=padding(231) for i,item in enumerate(beats_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the beats row.append(index) row.append(beats_c[i]) beats_c_avg= get_avg(beats_c) row.append(beats_c_avg) beats_c_max= get_max(beats_c) row.append(beats_c_max) beats_c_min = get_min(beats_c) row.append(beats_c_min) beats_c_stddev= get_stddev(beats_c) row.append(beats_c_stddev) beats_c_count = get_count(beats_c) row.append(beats_c_count) beats_c_sum = get_sum(beats_c) row.append(beats_c_sum) beats_start = hdf5_getters.get_beats_start(h5) row.append(beats_start[i]) beats_start_avg = get_avg(beats_start) row.append(beats_start_avg) beats_start_max= get_max(beats_start) row.append(beats_start_max) beats_start_min = get_min(beats_start) row.append(beats_start_min) beats_start_stddev= get_stddev(beats_start) row.append(beats_start_stddev) beats_start_count = get_count(beats_start) row.append(beats_start_count) beats_start_sum = get_sum(beats_start) row.append(beats_start_sum) for i in row_beats_padding: row.append(i) writer.writerow(row) row=[] row=gral_info[:] # "--------sections---------------" row_sec_padding=padding(217) #blank spaces left at the end of the row sec_c = hdf5_getters.get_sections_confidence(h5) group_index=2 row=[] row=gral_info[:] row_front=padding(28) #blank spaces left in front(empty spaces for bars,beats) for i,item in enumerate(sec_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the sections row.append(index) row.append(sec_c[i]) sec_c_avg= get_avg(sec_c) row.append(sec_c_avg) sec_c_max= get_max(sec_c) row.append(sec_c_max) sec_c_min = get_min(sec_c) row.append(sec_c_min) sec_c_stddev= get_stddev(sec_c) row.append(sec_c_stddev) sec_c_count = get_count(sec_c) row.append(sec_c_count) sec_c_sum = get_sum(sec_c) row.append(sec_c_sum) sec_start = hdf5_getters.get_sections_start(h5) row.append(sec_start[i]) sec_start_avg = get_avg(sec_start) row.append(sec_start_avg) sec_start_max= get_max(sec_start) row.append(sec_start_max) sec_start_min = get_min(sec_start) row.append(sec_start_min) sec_start_stddev= get_stddev(sec_start) row.append(sec_start_stddev) sec_start_count = get_count(sec_start) row.append(sec_start_count) sec_start_sum = get_sum(sec_start) row.append(sec_start_sum) for i in row_sec_padding: #appending the blank spaces at the end of the row row.append(i) writer.writerow(row) row=[] row=gral_info[:] #--------segments---------------" row_seg_padding=padding(182) #blank spaces at the end of the row row_front=padding(42) #blank spaces left in front of segments seg_c = hdf5_getters.get_segments_confidence(h5) group_index=3 row=[] row=gral_info[:] for i,item in enumerate(seg_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the segments row.append(index) row.append(seg_c[i]) seg_c_avg= get_avg(seg_c) row.append(seg_c_avg) seg_c_max= get_max(seg_c) row.append(seg_c_max) seg_c_min = get_min(seg_c) row.append(seg_c_min) seg_c_stddev= get_stddev(seg_c) row.append(seg_c_stddev) seg_c_count = get_count(seg_c) row.append(seg_c_count) seg_c_sum = get_sum(seg_c) row.append(seg_c_sum) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) row.append(seg_loud_max[i]) seg_loud_max_avg= get_avg(seg_loud_max) row.append(seg_loud_max_avg) seg_loud_max_max= get_max(seg_loud_max) row.append(seg_loud_max_max) seg_loud_max_min = get_min(seg_loud_max) row.append(seg_loud_max_min) seg_loud_max_stddev= get_stddev(seg_loud_max) row.append(seg_loud_max_stddev) seg_loud_max_count = get_count(seg_loud_max) row.append(seg_loud_max_count) seg_loud_max_sum = get_sum(seg_loud_max) row.append(seg_loud_max_sum) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) row.append(seg_loud_max_time[i]) seg_loud_max_time_avg= get_avg(seg_loud_max_time) row.append(seg_loud_max_time_avg) seg_loud_max_time_max= get_max(seg_loud_max_time) row.append(seg_loud_max_time_max) seg_loud_max_time_min = get_min(seg_loud_max_time) row.append(seg_loud_max_time_min) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) row.append(seg_loud_max_time_stddev) seg_loud_max_time_count = get_count(seg_loud_max_time) row.append(seg_loud_max_time_count) seg_loud_max_time_sum = get_sum(seg_loud_max_time) row.append(seg_loud_max_time_sum) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) row.append(seg_loud_start[i]) seg_loud_start_avg= get_avg(seg_loud_start) row.append(seg_loud_start_avg) seg_loud_start_max= get_max(seg_loud_start) row.append(seg_loud_start_max) seg_loud_start_min = get_min(seg_loud_start) row.append(seg_loud_start_min) seg_loud_start_stddev= get_stddev(seg_loud_start) row.append(seg_loud_start_stddev) seg_loud_start_count = get_count(seg_loud_start) row.append(seg_loud_start_count) seg_loud_start_sum = get_sum(seg_loud_start) row.append(seg_loud_start_sum) seg_start = hdf5_getters.get_segments_start(h5) row.append(seg_start[i]) seg_start_avg= get_avg(seg_start) row.append(seg_start_avg) seg_start_max= get_max(seg_start) row.append(seg_start_max) seg_start_min = get_min(seg_start) row.append(seg_start_min) seg_start_stddev= get_stddev(seg_start) row.append(seg_start_stddev) seg_start_count = get_count(seg_start) row.append(seg_start_count) seg_start_sum = get_sum(seg_start) row.append(seg_start_sum) for i in row_seg_padding: #appending blank spaces at the end of the row row.append(i) writer.writerow(row) row=[] row=gral_info[:] #----------segments pitch and timbre---------------" row_seg2_padding=padding(14) #blank spaces left at the end of the row row_front=padding(77) #blank spaces left at the front of the segments and timbre seg_pitch = hdf5_getters.get_segments_pitches(h5) transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows group_index=4 row=[] row=gral_info[:] for i,item in enumerate(transpose_pitch[0]): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of segments and timbre row.append(index) row.append(transpose_pitch[0][i]) seg_pitch_avg= get_avg(transpose_pitch[0]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[0]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[0]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[0]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[0]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[0]) row.append(seg_pitch_sum) row.append(transpose_pitch[1][i]) seg_pitch_avg= get_avg(transpose_pitch[1]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[1]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[1]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[1]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[1]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[1]) row.append(seg_pitch_sum) row.append(transpose_pitch[2][i]) seg_pitch_avg= get_avg(transpose_pitch[2]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[2]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[2]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[2]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[2]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[2]) row.append(seg_pitch_sum) row.append(transpose_pitch[3][i]) seg_pitch_avg= get_avg(transpose_pitch[3]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[3]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[3]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[3]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[3]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[3]) row.append(seg_pitch_sum) row.append(transpose_pitch[4][i]) seg_pitch_avg= get_avg(transpose_pitch[4]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[4]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[4]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[4]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[4]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[4]) row.append(seg_pitch_sum) row.append(transpose_pitch[5][i]) seg_pitch_avg= get_avg(transpose_pitch[5]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[5]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[5]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[5]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[5]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[5]) row.append(seg_pitch_sum) row.append(transpose_pitch[6][i]) seg_pitch_avg= get_avg(transpose_pitch[6]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[6]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[6]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[6]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[6]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[6]) row.append(seg_pitch_sum) row.append(transpose_pitch[7][i]) seg_pitch_avg= get_avg(transpose_pitch[7]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[7]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[7]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[7]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[7]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[7]) row.append(seg_pitch_sum) row.append(transpose_pitch[8][i]) seg_pitch_avg= get_avg(transpose_pitch[8]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[8]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[8]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[8]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[8]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[8]) row.append(seg_pitch_sum) row.append(transpose_pitch[9][i]) seg_pitch_avg= get_avg(transpose_pitch[9]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[9]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[9]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[9]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[9]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[9]) row.append(seg_pitch_sum) row.append(transpose_pitch[10][i]) seg_pitch_avg= get_avg(transpose_pitch[10]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[10]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[10]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[10]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[10]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[10]) row.append(seg_pitch_sum) row.append(transpose_pitch[11][i]) seg_pitch_avg= get_avg(transpose_pitch[11]) row.append(seg_pitch_avg) seg_pitch_max= get_max(transpose_pitch[11]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[11]) row.append(seg_pitch_min) seg_pitch_stddev= get_stddev(transpose_pitch[11]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[11]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[11]) row.append(seg_pitch_sum) #timbre arrays seg_timbre = hdf5_getters.get_segments_timbre(h5) transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows row.append(transpose_timbre[0][i]) seg_timbre_avg= get_avg(transpose_timbre[0]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[0]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[0]) row.append(seg_timbre_min) seg_timbre_stddev=get_stddev(transpose_timbre[0]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[0]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[0]) row.append(seg_timbre_sum) row.append(transpose_timbre[1][i]) seg_timbre_avg= get_avg(transpose_timbre[1]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[1]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[1]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[1]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[1]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[1]) row.append(seg_timbre_sum) row.append(transpose_timbre[2][i]) seg_timbre_avg= get_avg(transpose_timbre[2]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[2]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[2]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[2]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[2]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[2]) row.append(seg_timbre_sum) row.append(transpose_timbre[3][i]) seg_timbre_avg= get_avg(transpose_timbre[3]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[3]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[3]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[3]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[3]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[3]) row.append(seg_timbre_sum) row.append(transpose_timbre[4][i]) seg_timbre_avg= get_avg(transpose_timbre[4]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[4]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[4]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[4]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[4]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[4]) row.append(seg_timbre_sum) row.append(transpose_timbre[5][i]) seg_timbre_avg= get_avg(transpose_timbre[5]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[5]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[5]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[5]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[5]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[5]) row.append(seg_timbre_sum) row.append(transpose_timbre[6][i]) seg_timbre_avg= get_avg(transpose_timbre[6]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[6]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[6]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[6]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[6]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[6]) row.append(seg_timbre_sum) row.append(transpose_timbre[7][i]) seg_timbre_avg= get_avg(transpose_timbre[7]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[7]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[7]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[7]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[7]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[7]) row.append(seg_timbre_sum) row.append(transpose_timbre[8][i]) seg_timbre_avg= get_avg(transpose_timbre[8]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[8]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[8]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[8]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[8]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[8]) row.append(seg_timbre_sum) row.append(transpose_timbre[9][i]) seg_timbre_avg= get_avg(transpose_timbre[9]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[9]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[9]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[9]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[9]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[9]) row.append(seg_timbre_sum) row.append(transpose_timbre[10][i]) seg_timbre_avg= get_avg(transpose_timbre[10]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[10]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[10]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[10]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[10]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[10]) row.append(seg_timbre_sum) row.append(transpose_timbre[11][i]) seg_timbre_avg= get_avg(transpose_timbre[11]) row.append(seg_timbre_avg) seg_timbre_max= get_max(transpose_timbre[11]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[11]) row.append(seg_timbre_min) seg_timbre_stddev= get_stddev(transpose_timbre[11]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[11]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[11]) row.append(seg_timbre_sum) for item in row_seg2_padding: row.append(item) writer.writerow(row) row=[] row=gral_info[:] # "--------tatums---------------" tatms_c = hdf5_getters.get_tatums_confidence(h5) group_index=5 row_front=padding(245) #blank spaces left in front of tatums row=[] row=gral_info[:] for i,item in enumerate(tatms_c): row.append(group_index) row.append(i) for item in row_front: #appending blank spaces at the front of the row row.append(item) row.append(tatms_c[i]) tatms_c_avg= get_avg(tatms_c) row.append(tatms_c_avg) tatms_c_max= get_max(tatms_c) row.append(tatms_c_max) tatms_c_min = get_min(tatms_c) row.append(tatms_c_min) tatms_c_stddev= get_stddev(tatms_c) row.append(tatms_c_stddev) tatms_c_count = get_count(tatms_c) row.append(tatms_c_count) tatms_c_sum = get_sum(tatms_c) row.append(tatms_c_sum) tatms_start = hdf5_getters.get_tatums_start(h5) row.append(tatms_start[i]) tatms_start_avg= get_avg(tatms_start) row.append(tatms_start_avg) tatms_start_max= get_max(tatms_start) row.append(tatms_start_max) tatms_start_min = get_min(tatms_start) row.append(tatms_start_min) tatms_start_stddev= get_stddev(tatms_start) row.append(tatms_start_stddev) tatms_start_count = get_count(tatms_start) row.append(tatms_start_count) tatms_start_sum = get_sum(tatms_start) row.append(tatms_start_sum) writer.writerow(row) row=[] row=gral_info[:] transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 h5.close() count=count+1; print count;
# sanity checks if DIGITAL7_API_KEY is None: print 'You need to set a 7digital API key!' print 'Get one at: http://developer.7digital.net/' print 'Pass it as a flag: -7digitalkey KEY' print 'or set it under environment variable: DIGITAL7_API_KEY' sys.exit(0) if not os.path.isfile(h5path): print 'invalid path (not a file):',h5path sys.exit(0) # open h5 song, get all we know about the song h5 = hdf5_utils.open_h5_file_read(h5path) track_7digitalid = GETTERS.get_track_7digitalid(h5) release_7digitalid = GETTERS.get_release_7digitalid(h5) artist_7digitalid = GETTERS.get_artist_7digitalid(h5) artist_name = GETTERS.get_artist_name(h5) release_name = GETTERS.get_release(h5) track_name = GETTERS.get_title(h5) h5.close() # we already have the 7digital track id? way too easy! if track_7digitalid >= 0: preview = get_preview_from_trackid(track_7digitalid) if preview == '': print 'something went wrong when looking by track id' else: print preview sys.exit(0)
#h5 = hdf5_getters.open_h5_file_read(subset_path+'A/V/A/TRAVAAN128F9359AAE.h5') output_dir = '/sons/rwc/Learn/hdf5/' output = output_dir + 'rwc-g-m01_1.h5' h5 = hdf5_getters.open_h5_file_read(output) duration = hdf5_getters.get_duration(h5) title = hdf5_getters.get_title(h5) n_segments_start = hdf5_getters.get_segments_start(h5) n_segment = n_segments_start.shape[0] artist_name = hdf5_getters.get_artist_name(h5) timbre = hdf5_getters.get_segments_timbre(h5) loudness = hdf5_getters.get_segments_loudness_start(h5) C = hdf5_getters.get_segments_pitches(h5) beattimes = hdf5_getters.get_segments_start(h5) digital_id = hdf5_getters.get_track_7digitalid(h5) # guess the duration of a segment seg_dur = n_segments_start[1:] - n_segments_start[0:-1] # Load the learning parts from scipy.io import loadmat full_path = '/home/manu/workspace/audio-sketch/matlab/' savematname = 'learnbase_allfeats_2000000_seed_78.mat' lstruct = loadmat(full_path + savematname) learn_feats_all = lstruct['learn_feats_all'] learn_magspecs_all = lstruct['learn_magspecs_all'] learn_files = lstruct['learn_files'] # in this context we use only the Chroma vector learn_feats = learn_feats_all[:, 20:32]
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'track_id'.lower(): csvRowString += 'track_id' elif attribute == 'artist_familiarity'.lower(): csvRowString += 'artist_familiarity' elif attribute == 'artist_hotttnesss'.lower(): csvRowString += 'artist_hotttnesss' elif attribute == 'artist_mbid'.lower(): csvRowString += 'artist_mbid' elif attribute == 'artist_playmeid'.lower(): csvRowString += 'artist_playmeid' elif attribute == 'artist_7digitalid'.lower(): csvRowString += 'artist_7digitalid' elif attribute == 'release'.lower(): csvRowString += 'release' elif attribute == 'release_7digitalid'.lower(): csvRowString += 'release_7digitalid' elif attribute == 'song_hotttnesss'.lower(): csvRowString += 'song_hotttnesss' elif attribute == 'track_7digitalid'.lower(): csvRowString += 'track_7digitalid' elif attribute == 'analysis_sample_rate'.lower(): csvRowString += 'analysis_sample_rate' elif attribute == 'audio_md5'.lower(): csvRowString += 'audio_md5' elif attribute == 'end_of_fade_in'.lower(): csvRowString += 'end_of_fade_in' elif attribute == 'energy'.lower(): csvRowString += 'energy' elif attribute == 'key'.lower(): csvRowString += 'key' elif attribute == 'key_confidence'.lower(): csvRowString += 'key_confidence' elif attribute == 'loudness'.lower(): csvRowString += 'loudness' elif attribute == 'mode'.lower(): csvRowString += 'mode' elif attribute == 'mode_confidence'.lower(): csvRowString += 'mode_confidence' elif attribute == 'start_of_fade_out'.lower(): csvRowString += 'start_of_fade_out' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ( "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation," + "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature," + "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence," + "Title,Year,track_id,artist_hotttnesss,artist_mbid,artist_playmeid,artist_7digitalid," + "release,release_7digitalid,song_hotttnesss,track_7digitalid,analysis_sample_rate,audio_md5," + "end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out" ) ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,") outputFile1.write(csvRowString + "\n") csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "/vagrant/genrepython/MillionSongSubset" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) song.track_id = str(hdf5_getters.get_track_id(songH5File)) song.artist_familiarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artist_hotttnesss = str( hdf5_getters.get_artist_hotttnesss(songH5File)) song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.artist_playmeid = str( hdf5_getters.get_artist_playmeid(songH5File)) song.artist_7digitalid = str( hdf5_getters.get_artist_7digitalid(songH5File)) song.release = str(hdf5_getters.get_release(songH5File)) song.release_7digitalid = str( hdf5_getters.get_release_7digitalid(songH5File)) song.song_hotttnesss = str( hdf5_getters.get_song_hotttnesss(songH5File)) song.track_7digitalid = str( hdf5_getters.get_track_7digitalid(songH5File)) song.analysis_sample_rate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File)) song.end_of_fade_in = str( hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.key_confidence = str( hdf5_getters.get_key_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.mode_confidence = str( hdf5_getters.get_mode_confidence(songH5File)) song.start_of_fade_out = str( hdf5_getters.get_start_of_fade_out(songH5File)) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',', "") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',', '') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'track_id'.lower(): csvRowString += song.track_id elif attribute == 'artist_familiarity'.lower(): csvRowString += song.artist_familiarity elif attribute == 'artist_hotttnesss'.lower(): csvRowString += song.artist_hotttnesss elif attribute == 'artist_mbid'.lower(): csvRowString += song.artist_mbid elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digitalid'.lower(): csvRowString += song.artist_7digitalid elif attribute == 'release'.lower(): csvRowString += song.release elif attribute == 'release_7digitalid'.lower(): csvRowString += song.release_7digitalid elif attribute == 'song_hotttnesss'.lower(): csvRowString += song.song_hotttnesss elif attribute == 'track_7digitalid'.lower(): csvRowString += song.track_7digitalid elif attribute == 'analysis_sample_rate'.lower(): csvRowString += song.analysis_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += song.audio_md5 elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'key'.lower(): csvRowString += song.key elif attribute == 'key_confidence'.lower(): csvRowString += song.key_confidence elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_confidence'.lower(): csvRowString += song.mode_confidence elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
def complete_hd5_to_csv(basedir): ext = '.h5' # Get all files with extension .h5 # Header title. Essentially it is a schema for all the following songs header = [ 'Title', 'Artist familiarity', 'Artist hotness', 'Artist ID', 'Artist mbID', 'Artist playmeid', 'Artist 7DigitalID', 'Artist latitude', 'Artist longitude', 'Artist location', 'Artist Name', 'Release', 'Release 7DigitalID', 'Song ID', 'Song Hotness', 'Track 7Digital', 'Analysis sample rate', 'Audio md5', 'Danceability', 'Duration', 'End of Fade', 'Energy', 'Key', 'Key Confidence', 'Loudness', 'Mode', 'Mode Confidence', 'Start of fade out', 'Tempo', 'Time signature', 'Time signature confidence', 'Track ID', 'Year' ] with open('Tester2.csv', 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') # writing the header line. This line contains the schema of the data csv_writer.writerow(header) # Read all files from the given directories for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) print(files) for f in files: h5 = hdf5_getters.open_h5_file_read(f) # Write as row all elements. NOTE: Only the serialized elements are parsed and not arrays csv_writer.writerow([ hdf5_getters.get_title(h5), hdf5_getters.get_artist_familiarity(h5), hdf5_getters.get_artist_hotttnesss(h5), hdf5_getters.get_artist_id(h5), hdf5_getters.get_artist_mbid(h5), hdf5_getters.get_artist_playmeid(h5), hdf5_getters.get_artist_7digitalid(h5), hdf5_getters.get_artist_latitude(h5), hdf5_getters.get_artist_longitude(h5), hdf5_getters.get_artist_location(h5), hdf5_getters.get_artist_name(h5), hdf5_getters.get_release(h5), hdf5_getters.get_release_7digitalid(h5), hdf5_getters.get_song_id(h5), hdf5_getters.get_song_hotttnesss(h5), hdf5_getters.get_track_7digitalid(h5), hdf5_getters.get_analysis_sample_rate(h5), hdf5_getters.get_audio_md5(h5), hdf5_getters.get_danceability(h5), hdf5_getters.get_duration(h5), hdf5_getters.get_end_of_fade_in(h5), hdf5_getters.get_energy(h5), hdf5_getters.get_key(h5), hdf5_getters.get_key_confidence(h5), hdf5_getters.get_loudness(h5), hdf5_getters.get_mode(h5), hdf5_getters.get_mode_confidence(h5), hdf5_getters.get_start_of_fade_out(h5), hdf5_getters.get_tempo(h5), hdf5_getters.get_time_signature(h5), hdf5_getters.get_time_signature_confidence(h5), hdf5_getters.get_track_id(h5), hdf5_getters.get_year(h5) ]) # For debugging purposes. Everything as expected # print() # print("Num of songs -- ", hdf5_getters.get_num_songs(h5)) # One song per file # print("Title -- ", hdf5_getters.get_title(h5)) # Print the title of a specific h5 file # print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5)) # print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5)) # print("Artist ID -- ", hdf5_getters.get_artist_id(h5)) # print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5)) # print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5)) # print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5)) # print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5)) # print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5)) # print("Artist location -- ", hdf5_getters.get_artist_location(h5)) # print("Artist Name -- ", hdf5_getters.get_artist_name(h5)) # print("Release -- ", hdf5_getters.get_release(h5)) # print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5)) # print("Song ID -- ", hdf5_getters.get_song_id(h5)) # print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5)) # print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5)) # print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5)) # print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5)) # print("Danceability -- ", hdf5_getters.get_danceability(h5)) # print("Duration -- ", hdf5_getters.get_duration(h5)) # print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5)) # print("Energy -- ", hdf5_getters.get_energy(h5)) # print("Key -- ", hdf5_getters.get_key(h5)) # print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5)) # print("Loudness -- ", hdf5_getters.get_loudness(h5)) # print("Mode -- ", hdf5_getters.get_mode(h5)) # print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5)) # print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5)) # print("Tempo -- ", hdf5_getters.get_tempo(h5)) # print("Time signature -- ", hdf5_getters.get_time_signature(h5)) # print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5)) # print("Track ID -- ", hdf5_getters.get_track_id(h5)) # # print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5)) # # print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5)) # print("Year -- ", hdf5_getters.get_year(h5)) h5.close()
def data_to_flat_file(basedir, ext='.h5'): """ This function extracts the information from the tables and creates the flat file. """ count = 0 #song counter list_to_write = [] group_index = 0 row_to_write = "" writer = csv.writer(open("complete.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: row = [] print f h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title = title.replace('"', '') row.append(title) comma = title.find(',') if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album = album.replace('"', '') row.append(album) comma = album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma = artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name = artist_name.replace('"', '') row.append(artist_name) duration = hdf5_getters.get_duration(h5) row.append(duration) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) row.append(samp_rt) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) row.append(artist_7digitalid) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam = -1 row.append(artist_fam) artist_hotness = hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness = -1 row.append(artist_hotness) artist_id = hdf5_getters.get_artist_id(h5) row.append(artist_id) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat = -1 row.append(artist_lat) artist_loc = hdf5_getters.get_artist_location(h5) row.append(artist_loc) artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon = -1 row.append(artist_lon) artist_mbid = hdf5_getters.get_artist_mbid(h5) row.append(artist_mbid) #Getting the genre art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes = get_genre_indexes( trm_freq) #index of the highest freq genre_set = 0 #flag to see if the genre has been set or not final_genre = [] genres_so_far = [] for i in range(len(genre_indexes)): genre_tmp = get_genre( art_trm, genre_indexes[i] ) #genre that corresponds to the highest freq genres_so_far = genre_dict.get_genre_in_dict( genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set = 1 if genre_set == 1: col_num = [] for i in final_genre: column = int(i) #getting the column number of the genre col_num.append(column) genre_array = genre_columns(col_num) #genre array for i in range(len( genre_array)): #appending the genre_array to the row row.append(genre_array[i]) else: genre_array = genre_columns( -1 ) #when there is no genre matched, return an array of [0...0] for i in range(len( genre_array)): #appending the genre_array to the row row.append(genre_array[i]) artist_pmid = hdf5_getters.get_artist_playmeid(h5) row.append(artist_pmid) audio_md5 = hdf5_getters.get_audio_md5(h5) row.append(audio_md5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability = -1 row.append(danceability) end_fade_in = hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in = -1 row.append(end_fade_in) energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy = -1 row.append(energy) song_key = hdf5_getters.get_key(h5) row.append(song_key) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c = -1 row.append(key_c) loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness = -1 row.append(loudness) mode = hdf5_getters.get_mode(h5) row.append(mode) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf = -1 row.append(mode_conf) release_7digitalid = hdf5_getters.get_release_7digitalid(h5) row.append(release_7digitalid) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot = -1 row.append(song_hot) song_id = hdf5_getters.get_song_id(h5) row.append(song_id) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) row.append(start_fade_out) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo = -1 row.append(tempo) time_sig = hdf5_getters.get_time_signature(h5) row.append(time_sig) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c = -1 row.append(time_sig_c) track_id = hdf5_getters.get_track_id(h5) row.append(track_id) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) row.append(track_7digitalid) year = hdf5_getters.get_year(h5) row.append(year) bars_c = hdf5_getters.get_bars_confidence(h5) bars_start = hdf5_getters.get_bars_start(h5) row_bars_padding = padding( 245 ) #this is the array that will be attached at the end of th row #--------------bars---------------" gral_info = [] gral_info = row[:] empty = [] for i, item in enumerate(bars_c): row.append(group_index) row.append(i) row.append(bars_c[i]) bars_c_avg = get_avg(bars_c) row.append(bars_c_avg) bars_c_max = get_max(bars_c) row.append(bars_c_max) bars_c_min = get_min(bars_c) row.append(bars_c_min) bars_c_stddev = get_stddev(bars_c) row.append(bars_c_stddev) bars_c_count = get_count(bars_c) row.append(bars_c_count) bars_c_sum = get_sum(bars_c) row.append(bars_c_sum) row.append(bars_start[i]) bars_start_avg = get_avg(bars_start) row.append(bars_start_avg) bars_start_max = get_max(bars_start) row.append(bars_start_max) bars_start_min = get_min(bars_start) row.append(bars_start_min) bars_start_stddev = get_stddev(bars_start) row.append(bars_start_stddev) bars_start_count = get_count(bars_start) row.append(bars_start_count) bars_start_sum = get_sum(bars_start) row.append(bars_start_sum) for i in row_bars_padding: row.append(i) writer.writerow(row) row = [] row = gral_info[:] #--------beats---------------" beats_c = hdf5_getters.get_beats_confidence(h5) group_index = 1 row = [] row = gral_info[:] row_front = padding( 14) #blanks left in front of the row(empty spaces for bars) row_beats_padding = padding(231) for i, item in enumerate(beats_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the beats row.append(index) row.append(beats_c[i]) beats_c_avg = get_avg(beats_c) row.append(beats_c_avg) beats_c_max = get_max(beats_c) row.append(beats_c_max) beats_c_min = get_min(beats_c) row.append(beats_c_min) beats_c_stddev = get_stddev(beats_c) row.append(beats_c_stddev) beats_c_count = get_count(beats_c) row.append(beats_c_count) beats_c_sum = get_sum(beats_c) row.append(beats_c_sum) beats_start = hdf5_getters.get_beats_start(h5) row.append(beats_start[i]) beats_start_avg = get_avg(beats_start) row.append(beats_start_avg) beats_start_max = get_max(beats_start) row.append(beats_start_max) beats_start_min = get_min(beats_start) row.append(beats_start_min) beats_start_stddev = get_stddev(beats_start) row.append(beats_start_stddev) beats_start_count = get_count(beats_start) row.append(beats_start_count) beats_start_sum = get_sum(beats_start) row.append(beats_start_sum) for i in row_beats_padding: row.append(i) writer.writerow(row) row = [] row = gral_info[:] # "--------sections---------------" row_sec_padding = padding( 217) #blank spaces left at the end of the row sec_c = hdf5_getters.get_sections_confidence(h5) group_index = 2 row = [] row = gral_info[:] row_front = padding( 28) #blank spaces left in front(empty spaces for bars,beats) for i, item in enumerate(sec_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the sections row.append(index) row.append(sec_c[i]) sec_c_avg = get_avg(sec_c) row.append(sec_c_avg) sec_c_max = get_max(sec_c) row.append(sec_c_max) sec_c_min = get_min(sec_c) row.append(sec_c_min) sec_c_stddev = get_stddev(sec_c) row.append(sec_c_stddev) sec_c_count = get_count(sec_c) row.append(sec_c_count) sec_c_sum = get_sum(sec_c) row.append(sec_c_sum) sec_start = hdf5_getters.get_sections_start(h5) row.append(sec_start[i]) sec_start_avg = get_avg(sec_start) row.append(sec_start_avg) sec_start_max = get_max(sec_start) row.append(sec_start_max) sec_start_min = get_min(sec_start) row.append(sec_start_min) sec_start_stddev = get_stddev(sec_start) row.append(sec_start_stddev) sec_start_count = get_count(sec_start) row.append(sec_start_count) sec_start_sum = get_sum(sec_start) row.append(sec_start_sum) for i in row_sec_padding: #appending the blank spaces at the end of the row row.append(i) writer.writerow(row) row = [] row = gral_info[:] #--------segments---------------" row_seg_padding = padding(182) #blank spaces at the end of the row row_front = padding(42) #blank spaces left in front of segments seg_c = hdf5_getters.get_segments_confidence(h5) group_index = 3 row = [] row = gral_info[:] for i, item in enumerate(seg_c): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of the segments row.append(index) row.append(seg_c[i]) seg_c_avg = get_avg(seg_c) row.append(seg_c_avg) seg_c_max = get_max(seg_c) row.append(seg_c_max) seg_c_min = get_min(seg_c) row.append(seg_c_min) seg_c_stddev = get_stddev(seg_c) row.append(seg_c_stddev) seg_c_count = get_count(seg_c) row.append(seg_c_count) seg_c_sum = get_sum(seg_c) row.append(seg_c_sum) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) row.append(seg_loud_max[i]) seg_loud_max_avg = get_avg(seg_loud_max) row.append(seg_loud_max_avg) seg_loud_max_max = get_max(seg_loud_max) row.append(seg_loud_max_max) seg_loud_max_min = get_min(seg_loud_max) row.append(seg_loud_max_min) seg_loud_max_stddev = get_stddev(seg_loud_max) row.append(seg_loud_max_stddev) seg_loud_max_count = get_count(seg_loud_max) row.append(seg_loud_max_count) seg_loud_max_sum = get_sum(seg_loud_max) row.append(seg_loud_max_sum) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time( h5) row.append(seg_loud_max_time[i]) seg_loud_max_time_avg = get_avg(seg_loud_max_time) row.append(seg_loud_max_time_avg) seg_loud_max_time_max = get_max(seg_loud_max_time) row.append(seg_loud_max_time_max) seg_loud_max_time_min = get_min(seg_loud_max_time) row.append(seg_loud_max_time_min) seg_loud_max_time_stddev = get_stddev(seg_loud_max_time) row.append(seg_loud_max_time_stddev) seg_loud_max_time_count = get_count(seg_loud_max_time) row.append(seg_loud_max_time_count) seg_loud_max_time_sum = get_sum(seg_loud_max_time) row.append(seg_loud_max_time_sum) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) row.append(seg_loud_start[i]) seg_loud_start_avg = get_avg(seg_loud_start) row.append(seg_loud_start_avg) seg_loud_start_max = get_max(seg_loud_start) row.append(seg_loud_start_max) seg_loud_start_min = get_min(seg_loud_start) row.append(seg_loud_start_min) seg_loud_start_stddev = get_stddev(seg_loud_start) row.append(seg_loud_start_stddev) seg_loud_start_count = get_count(seg_loud_start) row.append(seg_loud_start_count) seg_loud_start_sum = get_sum(seg_loud_start) row.append(seg_loud_start_sum) seg_start = hdf5_getters.get_segments_start(h5) row.append(seg_start[i]) seg_start_avg = get_avg(seg_start) row.append(seg_start_avg) seg_start_max = get_max(seg_start) row.append(seg_start_max) seg_start_min = get_min(seg_start) row.append(seg_start_min) seg_start_stddev = get_stddev(seg_start) row.append(seg_start_stddev) seg_start_count = get_count(seg_start) row.append(seg_start_count) seg_start_sum = get_sum(seg_start) row.append(seg_start_sum) for i in row_seg_padding: #appending blank spaces at the end of the row row.append(i) writer.writerow(row) row = [] row = gral_info[:] #----------segments pitch and timbre---------------" row_seg2_padding = padding( 14) #blank spaces left at the end of the row row_front = padding( 77) #blank spaces left at the front of the segments and timbre seg_pitch = hdf5_getters.get_segments_pitches(h5) transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows group_index = 4 row = [] row = gral_info[:] for i, item in enumerate(transpose_pitch[0]): row.append(group_index) row.append(i) for index in row_front: #padding blanks in front of segments and timbre row.append(index) row.append(transpose_pitch[0][i]) seg_pitch_avg = get_avg(transpose_pitch[0]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[0]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[0]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[0]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[0]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[0]) row.append(seg_pitch_sum) row.append(transpose_pitch[1][i]) seg_pitch_avg = get_avg(transpose_pitch[1]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[1]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[1]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[1]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[1]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[1]) row.append(seg_pitch_sum) row.append(transpose_pitch[2][i]) seg_pitch_avg = get_avg(transpose_pitch[2]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[2]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[2]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[2]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[2]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[2]) row.append(seg_pitch_sum) row.append(transpose_pitch[3][i]) seg_pitch_avg = get_avg(transpose_pitch[3]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[3]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[3]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[3]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[3]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[3]) row.append(seg_pitch_sum) row.append(transpose_pitch[4][i]) seg_pitch_avg = get_avg(transpose_pitch[4]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[4]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[4]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[4]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[4]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[4]) row.append(seg_pitch_sum) row.append(transpose_pitch[5][i]) seg_pitch_avg = get_avg(transpose_pitch[5]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[5]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[5]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[5]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[5]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[5]) row.append(seg_pitch_sum) row.append(transpose_pitch[6][i]) seg_pitch_avg = get_avg(transpose_pitch[6]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[6]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[6]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[6]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[6]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[6]) row.append(seg_pitch_sum) row.append(transpose_pitch[7][i]) seg_pitch_avg = get_avg(transpose_pitch[7]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[7]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[7]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[7]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[7]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[7]) row.append(seg_pitch_sum) row.append(transpose_pitch[8][i]) seg_pitch_avg = get_avg(transpose_pitch[8]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[8]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[8]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[8]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[8]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[8]) row.append(seg_pitch_sum) row.append(transpose_pitch[9][i]) seg_pitch_avg = get_avg(transpose_pitch[9]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[9]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[9]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[9]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[9]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[9]) row.append(seg_pitch_sum) row.append(transpose_pitch[10][i]) seg_pitch_avg = get_avg(transpose_pitch[10]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[10]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[10]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[10]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[10]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[10]) row.append(seg_pitch_sum) row.append(transpose_pitch[11][i]) seg_pitch_avg = get_avg(transpose_pitch[11]) row.append(seg_pitch_avg) seg_pitch_max = get_max(transpose_pitch[11]) row.append(seg_pitch_max) seg_pitch_min = get_min(transpose_pitch[11]) row.append(seg_pitch_min) seg_pitch_stddev = get_stddev(transpose_pitch[11]) row.append(seg_pitch_stddev) seg_pitch_count = get_count(transpose_pitch[11]) row.append(seg_pitch_count) seg_pitch_sum = get_sum(transpose_pitch[11]) row.append(seg_pitch_sum) #timbre arrays seg_timbre = hdf5_getters.get_segments_timbre(h5) transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows row.append(transpose_timbre[0][i]) seg_timbre_avg = get_avg(transpose_timbre[0]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[0]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[0]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[0]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[0]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[0]) row.append(seg_timbre_sum) row.append(transpose_timbre[1][i]) seg_timbre_avg = get_avg(transpose_timbre[1]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[1]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[1]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[1]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[1]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[1]) row.append(seg_timbre_sum) row.append(transpose_timbre[2][i]) seg_timbre_avg = get_avg(transpose_timbre[2]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[2]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[2]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[2]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[2]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[2]) row.append(seg_timbre_sum) row.append(transpose_timbre[3][i]) seg_timbre_avg = get_avg(transpose_timbre[3]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[3]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[3]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[3]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[3]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[3]) row.append(seg_timbre_sum) row.append(transpose_timbre[4][i]) seg_timbre_avg = get_avg(transpose_timbre[4]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[4]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[4]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[4]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[4]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[4]) row.append(seg_timbre_sum) row.append(transpose_timbre[5][i]) seg_timbre_avg = get_avg(transpose_timbre[5]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[5]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[5]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[5]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[5]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[5]) row.append(seg_timbre_sum) row.append(transpose_timbre[6][i]) seg_timbre_avg = get_avg(transpose_timbre[6]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[6]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[6]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[6]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[6]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[6]) row.append(seg_timbre_sum) row.append(transpose_timbre[7][i]) seg_timbre_avg = get_avg(transpose_timbre[7]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[7]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[7]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[7]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[7]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[7]) row.append(seg_timbre_sum) row.append(transpose_timbre[8][i]) seg_timbre_avg = get_avg(transpose_timbre[8]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[8]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[8]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[8]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[8]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[8]) row.append(seg_timbre_sum) row.append(transpose_timbre[9][i]) seg_timbre_avg = get_avg(transpose_timbre[9]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[9]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[9]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[9]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[9]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[9]) row.append(seg_timbre_sum) row.append(transpose_timbre[10][i]) seg_timbre_avg = get_avg(transpose_timbre[10]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[10]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[10]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[10]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[10]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[10]) row.append(seg_timbre_sum) row.append(transpose_timbre[11][i]) seg_timbre_avg = get_avg(transpose_timbre[11]) row.append(seg_timbre_avg) seg_timbre_max = get_max(transpose_timbre[11]) row.append(seg_timbre_max) seg_timbre_min = get_min(transpose_timbre[11]) row.append(seg_timbre_min) seg_timbre_stddev = get_stddev(transpose_timbre[11]) row.append(seg_timbre_stddev) seg_timbre_count = get_count(transpose_timbre[11]) row.append(seg_timbre_count) seg_timbre_sum = get_sum(transpose_timbre[11]) row.append(seg_timbre_sum) for item in row_seg2_padding: row.append(item) writer.writerow(row) row = [] row = gral_info[:] # "--------tatums---------------" tatms_c = hdf5_getters.get_tatums_confidence(h5) group_index = 5 row_front = padding(245) #blank spaces left in front of tatums row = [] row = gral_info[:] for i, item in enumerate(tatms_c): row.append(group_index) row.append(i) for item in row_front: #appending blank spaces at the front of the row row.append(item) row.append(tatms_c[i]) tatms_c_avg = get_avg(tatms_c) row.append(tatms_c_avg) tatms_c_max = get_max(tatms_c) row.append(tatms_c_max) tatms_c_min = get_min(tatms_c) row.append(tatms_c_min) tatms_c_stddev = get_stddev(tatms_c) row.append(tatms_c_stddev) tatms_c_count = get_count(tatms_c) row.append(tatms_c_count) tatms_c_sum = get_sum(tatms_c) row.append(tatms_c_sum) tatms_start = hdf5_getters.get_tatums_start(h5) row.append(tatms_start[i]) tatms_start_avg = get_avg(tatms_start) row.append(tatms_start_avg) tatms_start_max = get_max(tatms_start) row.append(tatms_start_max) tatms_start_min = get_min(tatms_start) row.append(tatms_start_min) tatms_start_stddev = get_stddev(tatms_start) row.append(tatms_start_stddev) tatms_start_count = get_count(tatms_start) row.append(tatms_start_count) tatms_start_sum = get_sum(tatms_start) row.append(tatms_start_sum) writer.writerow(row) row = [] row = gral_info[:] transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg = [] seg_pitch_max = [] seg_pitch_min = [] seg_pitch_stddev = [] seg_pitch_count = [] seg_pitch_sum = [] i = 0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i = i + 1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg = [] seg_timbre_max = [] seg_timbre_min = [] seg_timbre_stddev = [] seg_timbre_count = [] seg_timbre_sum = [] i = 0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i = i + 1 h5.close() count = count + 1 print count
msdPath = sys.argv[1] i = 0.0 for folder in os.listdir(msdPath): insidePath = msdPath+'/'+folder if (os.path.isdir(insidePath)): for folder2 in os.listdir(insidePath): insidePath2 = insidePath + '/' + folder2 if (os.path.isdir(insidePath2)): for file in os.listdir(insidePath2): previewFilePath = insidePath2 + '/' + os.path.splitext(file)[0] + '.mp3' print previewFilePath if file.endswith('h5') and not os.path.isfile(previewFilePath): h5FilePath = insidePath2+'/'+file # print 'Processing ' + h5FilePath try: h5 = hdf5_getters.open_h5_file_read(h5FilePath) id7Digital = hdf5_getters.get_track_7digitalid(h5) h5.close() url = py7D.preview_url(id7Digital) urlretrieve = urllib.urlretrieve(url, previewFilePath) except Exception: print "Error accured" if file.endswith('h5'): # update_progress(int(i/7620 * 100)) sys.stdout.write("\r%d%%" % int(i/7620 * 100)) sys.stdout.flush() i += 1
def data_to_flat_file(basedir,ext='.h5') : """This function extract the information from the tables and creates the flat file.""" count = 0; #song counter list_to_write= [] row_to_write = "" writer = csv.writer(open("metadata_wholeA.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') comma=title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') #eliminating commas in the album comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,"); if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg= get_avg(bars_c) bars_c_max= get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev= get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max= get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev= get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg= get_avg(beats_c) beats_c_max= get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev= get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max= get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev= get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg= get_avg(sec_c) sec_c_max= get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev= get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max= get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev= get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg= get_avg(seg_c) seg_c_max= get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev= get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg= get_avg(seg_loud_max) seg_loud_max_max= get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev= get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg= get_avg(seg_loud_max_time) seg_loud_max_time_max= get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg= get_avg(seg_loud_start) seg_loud_start_max= get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev= get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg= get_avg(seg_start) seg_start_max= get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev= get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg= get_avg(tatms_c) tatms_c_max= get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev= get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg= get_avg(tatms_start) tatms_start_max= get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev= get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 #genre was found in dictionary if genre_set == 1: col_num=[] for genre in final_genre: column=int(genre) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array else: genre_array=genre_columns(-1) #the genre was not found in the dictionary transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 #Writing to the flat file writer.writerow([title,album,artist_name,year,duration,seg_start_count, tempo]) h5.close() count=count+1; print count;
def data_to_flat_file(basedir,ext='.h5') : """This function extract the information from the tables and creates the flat file.""" count = 0; #song counter list_to_write= [] row_to_write = "" writer = csv.writer(open("metadata.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title= title.replace('"','') comma=title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album= album.replace('"','') #eliminating commas in the album comma=album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma=artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name= artist_name.replace('"','') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam=-1 artist_hotness= hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness=-1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat=-1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,"); if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon=-1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability=-1 end_fade_in =hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in=-1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy=-1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c=-1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness=-1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf=-1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot=-1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo=-1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c=-1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg= get_avg(bars_c) bars_c_max= get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev= get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max= get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev= get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg= get_avg(beats_c) beats_c_max= get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev= get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max= get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev= get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg= get_avg(sec_c) sec_c_max= get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev= get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max= get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev= get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg= get_avg(seg_c) seg_c_max= get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev= get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg= get_avg(seg_loud_max) seg_loud_max_max= get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev= get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg= get_avg(seg_loud_max_time) seg_loud_max_time_max= get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev= get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg= get_avg(seg_loud_start) seg_loud_start_max= get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev= get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg= get_avg(seg_start) seg_start_max= get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev= get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg= get_avg(tatms_c) tatms_c_max= get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev= get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg= get_avg(tatms_start) tatms_start_max= get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev= get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes=get_genre_indexes(trm_freq) #index of the highest freq final_genre=[] genres_so_far=[] for i in range(len(genre_indexes)): genre_tmp=get_genre(art_trm,genre_indexes[i]) #genre that corresponds to the highest freq genres_so_far=genre_dict.get_genre_in_dict(genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set=1 #genre was found in dictionary if genre_set == 1: col_num=[] for genre in final_genre: column=int(genre) #getting the column number of the genre col_num.append(column) genre_array=genre_columns(col_num) #genre array else: genre_array=genre_columns(-1) #the genre was not found in the dictionary transpose_pitch= seg_pitch.transpose() #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg=[] seg_pitch_max=[] seg_pitch_min=[] seg_pitch_stddev=[] seg_pitch_count=[] seg_pitch_sum=[] i=0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i=i+1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose() #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg=[] seg_timbre_max=[] seg_timbre_min=[] seg_timbre_stddev=[] seg_timbre_count=[] seg_timbre_sum=[] i=0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i=i+1 #Writing to the flat file writer.writerow([title,album,artist_name,duration,samp_rt,artist_7digitalid,artist_fam,artist_hotness,artist_id,artist_lat,artist_loc,artist_lon,artist_mbid,genre_array[0],genre_array[1],genre_array[2], genre_array[3],genre_array[4],genre_array[5],genre_array[6],genre_array[7],genre_array[8],genre_array[9],genre_array[10],genre_array[11],genre_array[12],genre_array[13],genre_array[14],genre_array[15], genre_array[16],genre_array[17],genre_array[18],genre_array[19],genre_array[20],genre_array[21],genre_array[22],genre_array[23],genre_array[24],genre_array[25],genre_array[26], genre_array[27],genre_array[28],genre_array[29],genre_array[30],genre_array[31],genre_array[32],genre_array[33],genre_array[34],genre_array[35],genre_array[36],genre_array[37],genre_array[38], genre_array[39],genre_array[40],genre_array[41],genre_array[42],genre_array[43],genre_array[44],genre_array[45],genre_array[46],genre_array[47],genre_array[48],genre_array[49], genre_array[50],genre_array[51],genre_array[52],genre_array[53],genre_array[54],genre_array[55],genre_array[56],genre_array[57],genre_array[58],genre_array[59], genre_array[60],genre_array[61],genre_array[62],genre_array[63],genre_array[64],genre_array[65],genre_array[66],genre_array[67],genre_array[68],genre_array[69], genre_array[70],genre_array[71],genre_array[72],genre_array[73],genre_array[74],genre_array[75],genre_array[76],genre_array[77],genre_array[78],genre_array[79], genre_array[80],genre_array[81],genre_array[82],genre_array[83],genre_array[84],genre_array[85],genre_array[86],genre_array[87],genre_array[88],genre_array[89], genre_array[90],genre_array[91],genre_array[92],genre_array[93],genre_array[94],genre_array[95],genre_array[96],genre_array[97],genre_array[98],genre_array[99],genre_array[100],genre_array[101], genre_array[102],genre_array[103],genre_array[104],genre_array[105],genre_array[106],genre_array[107],genre_array[108],genre_array[109],genre_array[110],genre_array[111],genre_array[112], genre_array[113],genre_array[114],genre_array[115],genre_array[116],genre_array[117],genre_array[118],genre_array[119],genre_array[120],genre_array[121],genre_array[122],genre_array[123], genre_array[124],genre_array[125],genre_array[126],genre_array[127],genre_array[128],genre_array[129],genre_array[130],genre_array[131],genre_array[132], artist_pmid,audio_md5,danceability,end_fade_in,energy,song_key,key_c,loudness,mode,mode_conf,release_7digitalid,song_hot,song_id,start_fade_out,tempo,time_sig,time_sig_c,track_id,track_7digitalid,year,bars_c_avg,bars_c_max,bars_c_min,bars_c_stddev,bars_c_count,bars_c_sum,bars_start_avg,bars_start_max,bars_start_min,bars_start_stddev,bars_start_count,bars_start_sum,beats_c_avg,beats_c_max,beats_c_min,beats_c_stddev,beats_c_count,beats_c_sum,beats_start_avg,beats_start_max,beats_start_min, beats_start_stddev,beats_start_count,beats_start_sum, sec_c_avg,sec_c_max,sec_c_min,sec_c_stddev,sec_c_count,sec_c_sum,sec_start_avg,sec_start_max,sec_start_min,sec_start_stddev,sec_start_count,sec_start_sum,seg_c_avg,seg_c_max,seg_c_min,seg_c_stddev,seg_c_count,seg_c_sum,seg_loud_max_avg,seg_loud_max_max,seg_loud_max_min,seg_loud_max_stddev,seg_loud_max_count,seg_loud_max_sum,seg_loud_max_time_avg,seg_loud_max_time_max,seg_loud_max_time_min,seg_loud_max_time_stddev,seg_loud_max_time_count,seg_loud_max_time_sum,seg_loud_start_avg,seg_loud_start_max,seg_loud_start_min,seg_loud_start_stddev,seg_loud_start_count,seg_loud_start_sum,seg_pitch_avg[0],seg_pitch_max[0],seg_pitch_min[0],seg_pitch_stddev[0],seg_pitch_count[0],seg_pitch_sum[0],seg_pitch_avg[1],seg_pitch_max[1],seg_pitch_min[1],seg_pitch_stddev[1],seg_pitch_count[1],seg_pitch_sum[1],seg_pitch_avg[2],seg_pitch_max[2],seg_pitch_min[2],seg_pitch_stddev[2],seg_pitch_count[2],seg_pitch_sum[2],seg_pitch_avg[3],seg_pitch_max[3],seg_pitch_min[3],seg_pitch_stddev[3],seg_pitch_count[3],seg_pitch_sum[3],seg_pitch_avg[4],seg_pitch_max[4],seg_pitch_min[4],seg_pitch_stddev[4],seg_pitch_count[4],seg_pitch_sum[4],seg_pitch_avg[5],seg_pitch_max[5],seg_pitch_min[5],seg_pitch_stddev[5],seg_pitch_count[5],seg_pitch_sum[5],seg_pitch_avg[6],seg_pitch_max[6],seg_pitch_min[6],seg_pitch_stddev[6],seg_pitch_count[6],seg_pitch_sum[6],seg_pitch_avg[7],seg_pitch_max[7],seg_pitch_min[7],seg_pitch_stddev[7],seg_pitch_count[7],seg_pitch_sum[7],seg_pitch_avg[8],seg_pitch_max[8],seg_pitch_min[8],seg_pitch_stddev[8],seg_pitch_count[8],seg_pitch_sum[8],seg_pitch_avg[9],seg_pitch_max[9],seg_pitch_min[9],seg_pitch_stddev[9],seg_pitch_count[9],seg_pitch_sum[9],seg_pitch_avg[10],seg_pitch_max[10],seg_pitch_min[10],seg_pitch_stddev[10],seg_pitch_count[10],seg_pitch_sum[10],seg_pitch_avg[11],seg_pitch_max[11],seg_pitch_min[11], seg_pitch_stddev[11],seg_pitch_count[11],seg_pitch_sum[11],seg_start_avg,seg_start_max,seg_start_min,seg_start_stddev, seg_start_count,seg_start_sum,seg_timbre_avg[0],seg_timbre_max[0],seg_timbre_min[0],seg_timbre_stddev[0],seg_timbre_count[0], seg_timbre_sum[0],seg_timbre_avg[1],seg_timbre_max[1],seg_timbre_min[1],seg_timbre_stddev[1],seg_timbre_count[1], seg_timbre_sum[1],seg_timbre_avg[2],seg_timbre_max[2],seg_timbre_min[2],seg_timbre_stddev[2],seg_timbre_count[2], seg_timbre_sum[2],seg_timbre_avg[3],seg_timbre_max[3],seg_timbre_min[3],seg_timbre_stddev[3],seg_timbre_count[3], seg_timbre_sum[3],seg_timbre_avg[4],seg_timbre_max[4],seg_timbre_min[4],seg_timbre_stddev[4],seg_timbre_count[4], seg_timbre_sum[4],seg_timbre_avg[5],seg_timbre_max[5],seg_timbre_min[5],seg_timbre_stddev[5],seg_timbre_count[5], seg_timbre_sum[5],seg_timbre_avg[6],seg_timbre_max[6],seg_timbre_min[6],seg_timbre_stddev[6],seg_timbre_count[6], seg_timbre_sum[6],seg_timbre_avg[7],seg_timbre_max[7],seg_timbre_min[7],seg_timbre_stddev[7],seg_timbre_count[7], seg_timbre_sum[7],seg_timbre_avg[8],seg_timbre_max[8],seg_timbre_min[8],seg_timbre_stddev[8],seg_timbre_count[8], seg_timbre_sum[8],seg_timbre_avg[9],seg_timbre_max[9],seg_timbre_min[9],seg_timbre_stddev[9],seg_timbre_count[9], seg_timbre_sum[9],seg_timbre_avg[10],seg_timbre_max[10],seg_timbre_min[10],seg_timbre_stddev[10],seg_timbre_count[10], seg_timbre_sum[10],seg_timbre_avg[11],seg_timbre_max[11],seg_timbre_min[11],seg_timbre_stddev[11],seg_timbre_count[11], seg_timbre_sum[11],tatms_c_avg,tatms_c_max,tatms_c_min,tatms_c_stddev,tatms_c_count,tatms_c_sum,tatms_start_avg,tatms_start_max,tatms_start_min,tatms_start_stddev,tatms_start_count,tatms_start_sum]) h5.close() count=count+1; print count;
def getInfo(files): data = [] build_str = '' with open(sys.argv[1], 'r') as f: contents = f.read() c = contents.split() f.close() print("creating csv with following fields:" + contents) for i in c: build_str = build_str + i + ',' build_str = build_str[:-1] build_str = build_str + '\n' for fil in files: curFile = getters.open_h5_file_read(fil) d2 = {} get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)} tid = fil.split('/')[-1].split('.')[0] # print(c) for i in c: if i in get_table: d2[i] = get_table[i] d2[i] = str(d2[i]).replace('\n','') build_str = build_str + d2[i] + ',' else: print('error: unspecified field') exit(0) build_str = build_str[:-1] # print(build_str[:-1]) build_str = build_str + '\n' curFile.close() build_str = build_str.replace('b','').replace("'",'').replace('"','') return (build_str)
song.audioMD5 = remove_trap_characters( str(hdf5_getters.get_audio_md5(songH5File))) song.endOfFadeIn = remove_trap_characters( str(hdf5_getters.get_end_of_fade_in(songH5File))) song.startOfFadeOut = remove_trap_characters( str(hdf5_getters.get_start_of_fade_out(songH5File))) song.energy = remove_trap_characters( str(hdf5_getters.get_energy(songH5File))) song.release = remove_trap_characters( str(hdf5_getters.get_release(songH5File))) song.release7digitalid = remove_trap_characters( str(hdf5_getters.get_release_7digitalid(songH5File))) song.songHotness = remove_trap_characters( str(hdf5_getters.get_song_hotttnesss(songH5File))) song.track7digitalid = remove_trap_characters( str(hdf5_getters.get_track_7digitalid(songH5File))) temp = hdf5_getters.get_similar_artists(songH5File) song.similarartists = remove_trap_characters(str(list(list(temp)))) song.similarArtistsCount = get_list_length(temp) song.loudness = remove_trap_characters( str(hdf5_getters.get_loudness(songH5File))) song.mode = remove_trap_characters( str(hdf5_getters.get_mode(songH5File))) song.modeConfidence = remove_trap_characters( str(hdf5_getters.get_mode_confidence(songH5File))) song.artistName = remove_trap_characters( str(hdf5_getters.get_artist_name(songH5File))) song.danceability = remove_trap_characters( str(hdf5_getters.get_danceability(songH5File))) song.duration = remove_trap_characters(
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+ " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'track_id'.lower(): csvRowString += 'track_id' elif attribute == 'artist_familiarity'.lower(): csvRowString += 'artist_familiarity' elif attribute == 'artist_hotttnesss'.lower(): csvRowString += 'artist_hotttnesss' elif attribute == 'artist_mbid'.lower(): csvRowString += 'artist_mbid' elif attribute == 'artist_playmeid'.lower(): csvRowString += 'artist_playmeid' elif attribute == 'artist_7digitalid'.lower(): csvRowString += 'artist_7digitalid' elif attribute == 'release'.lower(): csvRowString += 'release' elif attribute == 'release_7digitalid'.lower(): csvRowString += 'release_7digitalid' elif attribute == 'song_hotttnesss'.lower(): csvRowString += 'song_hotttnesss' elif attribute == 'track_7digitalid'.lower(): csvRowString += 'track_7digitalid' elif attribute == 'analysis_sample_rate'.lower(): csvRowString += 'analysis_sample_rate' elif attribute == 'audio_md5'.lower(): csvRowString += 'audio_md5' elif attribute == 'end_of_fade_in'.lower(): csvRowString += 'end_of_fade_in' elif attribute == 'energy'.lower(): csvRowString += 'energy' elif attribute == 'key'.lower(): csvRowString += 'key' elif attribute == 'key_confidence'.lower(): csvRowString += 'key_confidence' elif attribute == 'loudness'.lower(): csvRowString += 'loudness' elif attribute == 'mode'.lower(): csvRowString += 'mode' elif attribute == 'mode_confidence'.lower(): csvRowString += 'mode_confidence' elif attribute == 'start_of_fade_out'.lower(): csvRowString += 'start_of_fade_out' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString); csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,"+ "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+ "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+ "Title,Year,track_id,artist_hotttnesss,artist_mbid,artist_playmeid,artist_7digitalid,"+ "release,release_7digitalid,song_hotttnesss,track_7digitalid,analysis_sample_rate,audio_md5,"+ "end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out") ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,"); outputFile1.write(csvRowString + "\n"); csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "/vagrant/genrepython/MillionSongSubset" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) song.track_id = str(hdf5_getters.get_track_id(songH5File)) song.artist_familiarity = str(hdf5_getters.get_artist_familiarity(songH5File)) song.artist_hotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.artist_playmeid = str(hdf5_getters.get_artist_playmeid(songH5File)) song.artist_7digitalid = str(hdf5_getters.get_artist_7digitalid(songH5File)) song.release = str(hdf5_getters.get_release(songH5File)) song.release_7digitalid = str(hdf5_getters.get_release_7digitalid(songH5File)) song.song_hotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.track_7digitalid = str(hdf5_getters.get_track_7digitalid(songH5File)) song.analysis_sample_rate = str(hdf5_getters.get_analysis_sample_rate(songH5File)) song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File)) song.end_of_fade_in = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.key_confidence = str(hdf5_getters.get_key_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.mode_confidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.start_of_fade_out = str(hdf5_getters.get_start_of_fade_out(songH5File)) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',',"") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',','') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'track_id'.lower(): csvRowString += song.track_id elif attribute == 'artist_familiarity'.lower(): csvRowString += song.artist_familiarity elif attribute == 'artist_hotttnesss'.lower(): csvRowString += song.artist_hotttnesss elif attribute == 'artist_mbid'.lower(): csvRowString += song.artist_mbid elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digitalid'.lower(): csvRowString += song.artist_7digitalid elif attribute == 'release'.lower(): csvRowString += song.release elif attribute == 'release_7digitalid'.lower(): csvRowString += song.release_7digitalid elif attribute == 'song_hotttnesss'.lower(): csvRowString += song.song_hotttnesss elif attribute == 'track_7digitalid'.lower(): csvRowString += song.track_7digitalid elif attribute == 'analysis_sample_rate'.lower(): csvRowString += song.analysis_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += song.audio_md5 elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'key'.lower(): csvRowString += song.key elif attribute == 'key_confidence'.lower(): csvRowString += song.key_confidence elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_confidence'.lower(): csvRowString += song.mode_confidence elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
import cPickle as pickle import hdf5_getters as GETTERS DATAFILE = "../msd_dense_subset/dense/songs_tracks.pkl" #mood tracks for subset IDFILE = "ids.txt" with open(IDFILE, 'w') as f1, open(DATAFILE, 'r') as f2: data = pickle.load(f2) for track in data.values(): path = "../msd_dense_subset/dense/"+track[2]+"/"+track[3]+"/"+track[4]+"/"+track+".h5" h5 = GETTERS.open_h5_file_read(path) digitalid = str(GETTERS.get_track_7digitalid(h5)) h5.close() f1.write(digitalid+'\n') print "done"
def main(): outputFile = open('songs.csv', 'w') writer = csv.writer(outputFile) csvRowString = "song_number,artist_familiarity,artist_hotttnesss,artist_id,artist_mbid,artist_playmeid,artist_7digitalid,artist_latitude,artist_longitude,artist_location,artist_name,release,release_7digitalid,song_id,song_hotttnesss,title,track_7digitalid,analysis_sample_rate,audio_md5,danceability,duration,end_of_fade_in,energy,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out,tempo,time_signature,time_signature_confidence,track_id,year" outputFile.write(csvRowString + "\n") csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "." # "." As the default means the current directory ext = ".H5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP songCount = 0 for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) values = [ songCount, hdf5_getters.get_artist_familiarity(songH5File), hdf5_getters.get_artist_hotttnesss(songH5File), hdf5_getters.get_artist_id(songH5File), hdf5_getters.get_artist_mbid(songH5File), hdf5_getters.get_artist_playmeid(songH5File), hdf5_getters.get_artist_7digitalid(songH5File), hdf5_getters.get_artist_latitude(songH5File), hdf5_getters.get_artist_longitude(songH5File), hdf5_getters.get_artist_location(songH5File), hdf5_getters.get_artist_name(songH5File), hdf5_getters.get_release(songH5File), hdf5_getters.get_release_7digitalid(songH5File), hdf5_getters.get_song_id(songH5File), hdf5_getters.get_song_hotttnesss(songH5File), hdf5_getters.get_title(songH5File), hdf5_getters.get_track_7digitalid(songH5File), hdf5_getters.get_analysis_sample_rate(songH5File), hdf5_getters.get_audio_md5(songH5File), hdf5_getters.get_danceability(songH5File), hdf5_getters.get_duration(songH5File), hdf5_getters.get_end_of_fade_in(songH5File), hdf5_getters.get_energy(songH5File), hdf5_getters.get_key(songH5File), hdf5_getters.get_key_confidence(songH5File), hdf5_getters.get_loudness(songH5File), hdf5_getters.get_mode(songH5File), hdf5_getters.get_mode_confidence(songH5File), hdf5_getters.get_start_of_fade_out(songH5File), hdf5_getters.get_tempo(songH5File), hdf5_getters.get_time_signature(songH5File), hdf5_getters.get_time_signature_confidence(songH5File), hdf5_getters.get_track_id(songH5File), hdf5_getters.get_year(songH5File) ] songH5File.close() songCount = songCount + 1 writer.writerow(values) outputFile.close()
insidePath = msdPath + '/' + folder if (os.path.isdir(insidePath)): for folder2 in os.listdir(insidePath): insidePath2 = insidePath + '/' + folder2 if (os.path.isdir(insidePath2)): for file in os.listdir(insidePath2): previewFilePath = insidePath2 + '/' + os.path.splitext( file)[0] + '.mp3' print previewFilePath if file.endswith( 'h5') and not os.path.isfile(previewFilePath): h5FilePath = insidePath2 + '/' + file # print 'Processing ' + h5FilePath try: h5 = hdf5_getters.open_h5_file_read(h5FilePath) id7Digital = hdf5_getters.get_track_7digitalid( h5) h5.close() url = py7D.preview_url(id7Digital) urlretrieve = urllib.urlretrieve( url, previewFilePath) except Exception as e: print "Error accured: " + str(e) if file.endswith('h5'): # update_progress(int(i/7620 * 100)) sys.stdout.write("\r%d%%" % int(i / 7620 * 100)) sys.stdout.flush() i += 1
def hd5_single_random_file_parser(): # Open an h5 file in read mode h5 = hdf5_getters.open_h5_file_read( '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5' ) function_tracker = filter( lambda x: x.startswith('get'), hdf5_getters.__dict__.keys()) # Detects all the getter functions for f in function_tracker: # Print everything in function tracker print(f) # First effort to check what each field contains. print() # 55 available fields (exluding number of songs fields) print("Num of songs -- ", hdf5_getters.get_num_songs(h5)) # One song per file print("Title -- ", hdf5_getters.get_title(h5)) # Print the title of a specific h5 file print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5)) print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5)) print("Artist ID -- ", hdf5_getters.get_artist_id(h5)) print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5)) print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5)) print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5)) print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5)) print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5)) print("Artist location -- ", hdf5_getters.get_artist_location(h5)) print("Artist Name -- ", hdf5_getters.get_artist_name(h5)) print("Release -- ", hdf5_getters.get_release(h5)) print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5)) print("Song ID -- ", hdf5_getters.get_song_id(h5)) print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5)) print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5)) print("Similar artists -- ", hdf5_getters.get_similar_artists(h5)) print("Artist terms -- ", hdf5_getters.get_artist_terms(h5)) print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5)) print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5)) print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5)) print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5)) print("Danceability -- ", hdf5_getters.get_danceability(h5)) print("Duration -- ", hdf5_getters.get_duration(h5)) print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5)) print("Energy -- ", hdf5_getters.get_energy(h5)) print("Key -- ", hdf5_getters.get_key(h5)) print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5)) print("Loudness -- ", hdf5_getters.get_loudness(h5)) print("Mode -- ", hdf5_getters.get_mode(h5)) print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5)) print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5)) print("Tempo -- ", hdf5_getters.get_tempo(h5)) print("Time signature -- ", hdf5_getters.get_time_signature(h5)) print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5)) print("Track ID -- ", hdf5_getters.get_track_id(h5)) print("Segments Start -- ", hdf5_getters.get_segments_start(h5)) print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5)) print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5)) print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5)) print("Segments Loudness max -- ", hdf5_getters.get_segments_loudness_max(h5)) print("Segments Loudness max time-- ", hdf5_getters.get_segments_loudness_max_time(h5)) print("Segments Loudness start -- ", hdf5_getters.get_segments_loudness_start(h5)) print("Sections start -- ", hdf5_getters.get_sections_start(h5)) print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5)) print("Beats start -- ", hdf5_getters.get_beats_start(h5)) print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5)) print("Bars start -- ", hdf5_getters.get_bars_start(h5)) print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5)) print("Tatums start -- ", hdf5_getters.get_tatums_start(h5)) print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5)) print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5)) print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5)) print("Year -- ", hdf5_getters.get_year(h5)) fields = ['Title', 'Artist ID'] with open('Tester2.csv', 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') # writing the fields csv_writer.writerow(fields) # writing the data rows csv_writer.writerow( [hdf5_getters.get_title(h5), hdf5_getters.get_artist_id(h5)]) h5.close() # close h5 when completed in the end
def fill_attributes(song, songH5File): #----------------------------non array attributes------------------------------- song.analysisSampleRate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File)) song.artistFamiliarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File)) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File)) song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_segments_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_sections_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.release = str(hdf5_getters.get_release(songH5File)) song.releaseDigitalID = str( hdf5_getters.get_release_7digitalid(songH5File)) song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.trackID = str(hdf5_getters.get_track_id(songH5File)) song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #-------------------------------array attributes-------------------------------------- #array float song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar( hdf5_getters.get_beats_start(songH5File)) #array float song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_freq(songH5File)) #array float song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_weight(songH5File)) #array int song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar( hdf5_getters.get_artist_mbtags_count(songH5File)) #array float song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_bars_confidence(songH5File)) #array float song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar( hdf5_getters.get_bars_start(songH5File)) #array float song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_beats_confidence(songH5File)) #array float song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_sections_confidence(songH5File)) #array float song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar( hdf5_getters.get_sections_start(songH5File)) #array float song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_segments_confidence(songH5File)) #array float song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max(songH5File)) #array float song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max_time(songH5File)) #array float song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_start(songH5File)) #array float song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_start(songH5File)) #array float song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_tatums_confidence(songH5File)) #array float song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar( hdf5_getters.get_tatums_start(songH5File)) #array2d float song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_timbre(songH5File)) #array2d float song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_pitches(songH5File)) #------------------------array string attributes------------------------ song.similarArtists = convert_array_to_string( hdf5_getters.get_similar_artists(songH5File)) #array string song.artistTerms = convert_array_to_string( hdf5_getters.get_artist_terms(songH5File)) #array string song.artistmbTags = convert_array_to_string( hdf5_getters.get_artist_mbtags(songH5File)) #array string return song
import cPickle as pickle import hdf5_getters as GETTERS DATAFILE = "../msd_dense_subset/dense/songs_tracks.pkl" #mood tracks for subset IDFILE = "ids.txt" with open(IDFILE, 'w') as f1, open(DATAFILE, 'r') as f2: data = pickle.load(f2) for track in data.values(): path = "../msd_dense_subset/dense/" + track[2] + "/" + track[ 3] + "/" + track[4] + "/" + track + ".h5" h5 = GETTERS.open_h5_file_read(path) digitalid = str(GETTERS.get_track_7digitalid(h5)) h5.close() f1.write(digitalid + '\n') print "done"
#segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array similar_artists = ','.join( str(e) for e in GETTERS.get_similar_artists(h5, i)) # array song_hotttnesss = GETTERS.get_song_hotttnesss(h5, i) song_id = GETTERS.get_song_id(h5, i) start_of_fade_out = GETTERS.get_start_of_fade_out(h5, i) #tatums_confidence = ','.join(str(e) for e in GETTERS.get_tatums_confidence(h5, i)) # array #tatums_start = ','.join(str(e) for e in GETTERS.get_tatums_start(h5, i)) # array tempo = GETTERS.get_tempo(h5, i) time_signature = GETTERS.get_time_signature(h5, i) time_signature_confidence = GETTERS.get_time_signature_confidence( h5, i) title = GETTERS.get_title(h5, i) track_7digitalid = GETTERS.get_track_7digitalid(h5, i) track_id = GETTERS.get_track_id(h5, i) year = GETTERS.get_year(h5, i) loops += 1 #row = {'analysis_sample_rate':analysis_sample_rate,'artist_7digitalid':artist_7digitalid,'artist_familiarity':artist_familiarity,'artist_hotttnesss':artist_hotttnesss,'artist_id':artist_id,'artist_latitude':artist_latitude,'artist_location':artist_location,'artist_longitude':artist_longitude,'artist_mbid':artist_mbid,'artist_mbtags_count':artist_mbtags_count,'artist_mbtags':artist_mbtags,'artist_name':artist_name,'artist_terms_freq':artist_terms_freq,'artist_terms_weight':artist_terms_weight,'artist_terms':artist_terms,'audio_md5':audio_md5,'bars_confidence':bars_confidence,'bars_start':bars_start,'beats_confidence':beats_confidence,'beats_start':beats_start,'danceability':danceability,'duration':duration,'end_of_fade_in':end_of_fade_in,'energy':energy,'key_confidence':key_confidence,'key':key,'loudness':loudness,'mode_confidence':mode_confidence,'mode':mode,'release_7digitalid':release_7digitalid,'release':release,'sections_confidence':sections_confidence,'sections_start':sections_start,'segments_confidence':segments_confidence,'segments_loudness_max_time':segments_loudness_max_time,'segments_loudness_max':segments_loudness_max,'segments_loudness_start':segments_loudness_start,'segments_pitches':segments_pitches,'segments_start':segments_start,'segments_timbre':segments_timbre,'similar_artists':similar_artists,'song_hotttnesss':song_hotttnesss,'song_id':song_id,'start_of_fade_out':start_of_fade_out,'tatums_confidence':tatums_confidence,'tatums_start':tatums_start,'tempo':tempo,'time_signature_confidence':time_signature_confidence,'time_signature':time_signature,'title':title,'track_7digitalid':track_7digitalid,'track_id':track_id,'year':year,} row = { 'analysis_sample_rate': analysis_sample_rate, 'artist_7digitalid': artist_7digitalid, 'artist_familiarity': artist_familiarity, 'artist_hotttnesss': artist_hotttnesss, 'artist_id': artist_id, 'artist_latitude': artist_latitude, 'artist_location': artist_location, 'artist_longitude': artist_longitude, 'artist_mbid': artist_mbid,
def data_to_flat_file(basedir, ext='.h5'): """This function extract the information from the tables and creates the flat file.""" count = 0 #song counter list_to_write = [] row_to_write = "" writer = csv.writer(open("metadata_wholeA.csv", "wb")) for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print f #the name of the file h5 = hdf5_getters.open_h5_file_read(f) title = hdf5_getters.get_title(h5) title = title.replace('"', '') comma = title.find(',') #eliminating commas in the title if comma != -1: print title time.sleep(1) album = hdf5_getters.get_release(h5) album = album.replace('"', '') #eliminating commas in the album comma = album.find(',') if comma != -1: print album time.sleep(1) artist_name = hdf5_getters.get_artist_name(h5) comma = artist_name.find(',') if comma != -1: print artist_name time.sleep(1) artist_name = artist_name.replace('"', '') #eliminating double quotes duration = hdf5_getters.get_duration(h5) samp_rt = hdf5_getters.get_analysis_sample_rate(h5) artist_7digitalid = hdf5_getters.get_artist_7digitalid(h5) artist_fam = hdf5_getters.get_artist_familiarity(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_fam) == True: artist_fam = -1 artist_hotness = hdf5_getters.get_artist_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_hotness) == True: artist_hotness = -1 artist_id = hdf5_getters.get_artist_id(h5) artist_lat = hdf5_getters.get_artist_latitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lat) == True: artist_lat = -1 artist_loc = hdf5_getters.get_artist_location(h5) #checks artist_loc to see if it is a hyperlink if it is set as empty string artist_loc = artist_loc.replace(",", "\,") if artist_loc.startswith("<a"): artist_loc = "" if len(artist_loc) > 100: artist_loc = "" artist_lon = hdf5_getters.get_artist_longitude(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(artist_lon) == True: artist_lon = -1 artist_mbid = hdf5_getters.get_artist_mbid(h5) artist_pmid = hdf5_getters.get_artist_playmeid(h5) audio_md5 = hdf5_getters.get_audio_md5(h5) danceability = hdf5_getters.get_danceability(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(danceability) == True: danceability = -1 end_fade_in = hdf5_getters.get_end_of_fade_in(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(end_fade_in) == True: end_fade_in = -1 energy = hdf5_getters.get_energy(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(energy) == True: energy = -1 song_key = hdf5_getters.get_key(h5) key_c = hdf5_getters.get_key_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(key_c) == True: key_c = -1 loudness = hdf5_getters.get_loudness(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(loudness) == True: loudness = -1 mode = hdf5_getters.get_mode(h5) mode_conf = hdf5_getters.get_mode_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(mode_conf) == True: mode_conf = -1 release_7digitalid = hdf5_getters.get_release_7digitalid(h5) song_hot = hdf5_getters.get_song_hotttnesss(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(song_hot) == True: song_hot = -1 song_id = hdf5_getters.get_song_id(h5) start_fade_out = hdf5_getters.get_start_of_fade_out(h5) tempo = hdf5_getters.get_tempo(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(tempo) == True: tempo = -1 time_sig = hdf5_getters.get_time_signature(h5) time_sig_c = hdf5_getters.get_time_signature_confidence(h5) #checking if we get a "nan" if we do we change it to -1 if numpy.isnan(time_sig_c) == True: time_sig_c = -1 track_id = hdf5_getters.get_track_id(h5) track_7digitalid = hdf5_getters.get_track_7digitalid(h5) year = hdf5_getters.get_year(h5) bars_c = hdf5_getters.get_bars_confidence(h5) bars_c_avg = get_avg(bars_c) bars_c_max = get_max(bars_c) bars_c_min = get_min(bars_c) bars_c_stddev = get_stddev(bars_c) bars_c_count = get_count(bars_c) bars_c_sum = get_sum(bars_c) bars_start = hdf5_getters.get_bars_start(h5) bars_start_avg = get_avg(bars_start) bars_start_max = get_max(bars_start) bars_start_min = get_min(bars_start) bars_start_stddev = get_stddev(bars_start) bars_start_count = get_count(bars_start) bars_start_sum = get_sum(bars_start) beats_c = hdf5_getters.get_beats_confidence(h5) beats_c_avg = get_avg(beats_c) beats_c_max = get_max(beats_c) beats_c_min = get_min(beats_c) beats_c_stddev = get_stddev(beats_c) beats_c_count = get_count(beats_c) beats_c_sum = get_sum(beats_c) beats_start = hdf5_getters.get_beats_start(h5) beats_start_avg = get_avg(beats_start) beats_start_max = get_max(beats_start) beats_start_min = get_min(beats_start) beats_start_stddev = get_stddev(beats_start) beats_start_count = get_count(beats_start) beats_start_sum = get_sum(beats_start) sec_c = hdf5_getters.get_sections_confidence(h5) sec_c_avg = get_avg(sec_c) sec_c_max = get_max(sec_c) sec_c_min = get_min(sec_c) sec_c_stddev = get_stddev(sec_c) sec_c_count = get_count(sec_c) sec_c_sum = get_sum(sec_c) sec_start = hdf5_getters.get_sections_start(h5) sec_start_avg = get_avg(sec_start) sec_start_max = get_max(sec_start) sec_start_min = get_min(sec_start) sec_start_stddev = get_stddev(sec_start) sec_start_count = get_count(sec_start) sec_start_sum = get_sum(sec_start) seg_c = hdf5_getters.get_segments_confidence(h5) seg_c_avg = get_avg(seg_c) seg_c_max = get_max(seg_c) seg_c_min = get_min(seg_c) seg_c_stddev = get_stddev(seg_c) seg_c_count = get_count(seg_c) seg_c_sum = get_sum(seg_c) seg_loud_max = hdf5_getters.get_segments_loudness_max(h5) seg_loud_max_avg = get_avg(seg_loud_max) seg_loud_max_max = get_max(seg_loud_max) seg_loud_max_min = get_min(seg_loud_max) seg_loud_max_stddev = get_stddev(seg_loud_max) seg_loud_max_count = get_count(seg_loud_max) seg_loud_max_sum = get_sum(seg_loud_max) seg_loud_max_time = hdf5_getters.get_segments_loudness_max_time(h5) seg_loud_max_time_avg = get_avg(seg_loud_max_time) seg_loud_max_time_max = get_max(seg_loud_max_time) seg_loud_max_time_min = get_min(seg_loud_max_time) seg_loud_max_time_stddev = get_stddev(seg_loud_max_time) seg_loud_max_time_count = get_count(seg_loud_max_time) seg_loud_max_time_sum = get_sum(seg_loud_max_time) seg_loud_start = hdf5_getters.get_segments_loudness_start(h5) seg_loud_start_avg = get_avg(seg_loud_start) seg_loud_start_max = get_max(seg_loud_start) seg_loud_start_min = get_min(seg_loud_start) seg_loud_start_stddev = get_stddev(seg_loud_start) seg_loud_start_count = get_count(seg_loud_start) seg_loud_start_sum = get_sum(seg_loud_start) seg_pitch = hdf5_getters.get_segments_pitches(h5) pitch_size = len(seg_pitch) seg_start = hdf5_getters.get_segments_start(h5) seg_start_avg = get_avg(seg_start) seg_start_max = get_max(seg_start) seg_start_min = get_min(seg_start) seg_start_stddev = get_stddev(seg_start) seg_start_count = get_count(seg_start) seg_start_sum = get_sum(seg_start) seg_timbre = hdf5_getters.get_segments_timbre(h5) tatms_c = hdf5_getters.get_tatums_confidence(h5) tatms_c_avg = get_avg(tatms_c) tatms_c_max = get_max(tatms_c) tatms_c_min = get_min(tatms_c) tatms_c_stddev = get_stddev(tatms_c) tatms_c_count = get_count(tatms_c) tatms_c_sum = get_sum(tatms_c) tatms_start = hdf5_getters.get_tatums_start(h5) tatms_start_avg = get_avg(tatms_start) tatms_start_max = get_max(tatms_start) tatms_start_min = get_min(tatms_start) tatms_start_stddev = get_stddev(tatms_start) tatms_start_count = get_count(tatms_start) tatms_start_sum = get_sum(tatms_start) #Getting the genres genre_set = 0 #flag to see if the genre has been set or not art_trm = hdf5_getters.get_artist_terms(h5) trm_freq = hdf5_getters.get_artist_terms_freq(h5) trn_wght = hdf5_getters.get_artist_terms_weight(h5) a_mb_tags = hdf5_getters.get_artist_mbtags(h5) genre_indexes = get_genre_indexes( trm_freq) #index of the highest freq final_genre = [] genres_so_far = [] for i in range(len(genre_indexes)): genre_tmp = get_genre( art_trm, genre_indexes[i] ) #genre that corresponds to the highest freq genres_so_far = genre_dict.get_genre_in_dict( genre_tmp) #getting the genre from the dictionary if len(genres_so_far) != 0: for i in genres_so_far: final_genre.append(i) genre_set = 1 #genre was found in dictionary if genre_set == 1: col_num = [] for genre in final_genre: column = int( genre) #getting the column number of the genre col_num.append(column) genre_array = genre_columns(col_num) #genre array else: genre_array = genre_columns( -1) #the genre was not found in the dictionary transpose_pitch = seg_pitch.transpose( ) #this is to tranpose the matrix,so we can have 12 rows #arrays containing the aggregate values of the 12 rows seg_pitch_avg = [] seg_pitch_max = [] seg_pitch_min = [] seg_pitch_stddev = [] seg_pitch_count = [] seg_pitch_sum = [] i = 0 #Getting the aggregate values in the pitches array for row in transpose_pitch: seg_pitch_avg.append(get_avg(row)) seg_pitch_max.append(get_max(row)) seg_pitch_min.append(get_min(row)) seg_pitch_stddev.append(get_stddev(row)) seg_pitch_count.append(get_count(row)) seg_pitch_sum.append(get_sum(row)) i = i + 1 #extracting information from the timbre array transpose_timbre = seg_pitch.transpose( ) #tranposing matrix, to have 12 rows #arrays containing the aggregate values of the 12 rows seg_timbre_avg = [] seg_timbre_max = [] seg_timbre_min = [] seg_timbre_stddev = [] seg_timbre_count = [] seg_timbre_sum = [] i = 0 for row in transpose_timbre: seg_timbre_avg.append(get_avg(row)) seg_timbre_max.append(get_max(row)) seg_timbre_min.append(get_min(row)) seg_timbre_stddev.append(get_stddev(row)) seg_timbre_count.append(get_count(row)) seg_timbre_sum.append(get_sum(row)) i = i + 1 #Writing to the flat file writer.writerow([ title, album, artist_name, year, duration, seg_start_count, tempo ]) h5.close() count = count + 1 print count
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input( "\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude," + " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Familiarity'.lower(): ####Added by us! csvRowString += song.familiarity elif attribute == 'artist_mbid'.lower(): csvRowString += song.artist_mbid elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digid'.lower(): csvRowString += song.artist_7digid elif attribute == 'hottness'.lower(): csvRowString += song.hottness elif attribute == 'song_hottness'.lower(): csvRowString += song.song_hottness elif attribute == 'digitalid7'.lower(): csvRowString += song.digitalid7 elif attribute == 'similar_artists'.lower(): csvRowString += song.similar_artists elif attribute == 'artist_terms'.lower(): csvRowString += song.artist_terms elif attribute == 'art_terms_freq'.lower(): csvRowString += song.art_terms_freq elif attribute == 'art_terms_weight'.lower(): csvRowString += song.art_terms_weight elif attribute == 'a_sample_rate'.lower(): csvRowString += song.a_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += song.audio_md5 elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_conf'.lower(): csvRowString += song.mode_conf elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out elif attribute == 'trackid'.lower(): csvRowString += song.trackid elif attribute == 'segm_start'.lower(): csvRowString += song.segm_start elif attribute == 'segm_conf'.lower(): csvRowString += song.segm_conf elif attribute == 'segm_pitch'.lower(): csvRowString += song.segm_pitch elif attribute == 'segm_timbre'.lower(): csvRowString += song.segm_timbre elif attribute == 'segm_max_loud'.lower(): csvRowString += song.segm_max_loud elif attribute == 'segm_max_loud_time'.lower(): csvRowString += song.segm_max_loud_time elif attribute == 'segm_loud_start'.lower(): csvRowString += song.segm_loud_start elif attribute == 'sect_start'.lower(): csvRowString += song.sect_start elif attribute == 'sect_conf'.lower(): csvRowString += song.sect_conf elif attribute == 'beats_start'.lower(): csvRowString += song.beats_start elif attribute == 'beats_conf'.lower(): csvRowString += song.beats_conf elif attribute == 'bars_start'.lower(): csvRowString += song.bars_start elif attribute == 'bars_conf'.lower(): csvRowString += song.bars_conf elif attribute == 'tatums_start'.lower(): csvRowString += song.tatums_start elif attribute == 'tatums_conf'.lower(): csvRowString += song.tatums_conf elif attribute == 'artist_mbtags'.lower(): csvRowString += song.artist_mbtags elif attribute == 'artist_mbtags_count'.lower(): csvRowString += song.artist_mbtags_count elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print("==============") print("I believe there has been an error with the input.") print("==============") break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = "SongID,AlbumID,AlbumName,ArtistID,ArtistLatitude,ArtistLocation,ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,Title,Year,Familiarity,Artist_Mbid,Artist_PlaymeId,Artist_7didId,Hottness,Song_Hottness,7digitalid,A_Sample_Rate,Audio_Md5,End_Of_Fade_In,Energy,Loudness,Mode,Mode_Conf,Start_Of_Fade_Out,TrackId" ################################################# csvAttributeList = re.split(',', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" ################################################# #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "/home/bigdata/smalltest/" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root, '*' + ext)) for f in files: print(f) songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) # testDanceability = hdf5_getters.get_danceability(songH5File) # print type(testDanceability) # print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str( hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str( hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str( hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) # song.setGenreList() song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str( hdf5_getters.get_key_confidence(songH5File)) # song.lyrics = None # song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str( hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #########Added by us! song.familiarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artist_mbid = str(hdf5_getters.get_artist_mbid(songH5File)) song.artist_playmeid = str( hdf5_getters.get_artist_playmeid(songH5File)) song.artist_7digid = str( hdf5_getters.get_artist_7digitalid(songH5File)) song.hottness = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.song_hottness = str( hdf5_getters.get_song_hotttnesss(songH5File)) song.digitalid7 = str( hdf5_getters.get_track_7digitalid(songH5File)) #song.similar_artists = str(hdf5_getters.get_similar_artists(songH5File)) #song.artist_terms = str(hdf5_getters.get_artist_terms(songH5File)) #song.art_terms_freq = str(hdf5_getters.get_artist_terms_freq(songH5File)) #song.art_terms_weight = str(hdf5_getters.get_artist_terms_weight(songH5File)) song.a_sample_rate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.audio_md5 = str(hdf5_getters.get_audio_md5(songH5File)) song.end_of_fade_in = str( hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.mode_conf = str(hdf5_getters.get_mode_confidence(songH5File)) song.start_of_fade_out = str( hdf5_getters.get_start_of_fade_out(songH5File)) song.trackid = str(hdf5_getters.get_track_id(songH5File)) #song.segm_start = str(hdf5_getters.get_segments_start(songH5File)) #song.segm_conf = str(hdf5_getters.get_segments_confidence(songH5File)) #song.segm_pitch = str(hdf5_getters.get_segments_pitches(songH5File)) #song.segm_timbre = str(hdf5_getters.get_segments_timbre(songH5File)) #song.segm_max_loud = str(hdf5_getters.get_segments_loudness_max(songH5File)) #song.segm_max_loud_time = str(hdf5_getters.get_segments_loudness_max_time(songH5File)) #song.segm_loud_start = str(hdf5_getters.get_segments_loudness_start(songH5File)) #song.sect_start = str(hdf5_getters.get_sections_start(songH5File)) #song.sect_conf = str(hdf5_getters.get_sections_confidence(songH5File)) #song.beats_start = str(hdf5_getters.get_beats_start(songH5File)) #song.beats_conf = str(hdf5_getters.get_beats_confidence(songH5File)) #song.bars_start = str(hdf5_getters.get_bars_start(songH5File)) #song.bars_conf = str(hdf5_getters.get_bars_confidence(songH5File)) #song.tatums_start = str(hdf5_getters.get_tatums_start(songH5File)) #song.tatums_conf = str(hdf5_getters.get_tatums_confidence(songH5File)) #song.artist_mbtags = str(hdf5_getters.get_artist_mbtags(songH5File)) #song.artist_mbtags_count = str(hdf5_getters.get_artist_mbtags_count(songH5File)) #print song count #csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace("b\"", "") albumName = albumName.replace("\"", "") albumName = albumName.replace(',', "") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',', '') location = location.replace("b\"", "") location = location.replace("\"", "") csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): artistName = song.artistName artistName = artistName.replace("b\"", "") artistName = artistName.replace("\"", "") csvRowString += "\"" + artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): t = song.title t = t.replace("b\"", "") t = t.replace("\"", "") csvRowString += "\"" + t + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'Familiarity'.lower(): ####Added by us! csvRowString += song.familiarity elif attribute == 'artist_mbid'.lower(): csvRowString += "\"" + song.artist_mbid + "\"" elif attribute == 'artist_playmeid'.lower(): csvRowString += song.artist_playmeid elif attribute == 'artist_7digid'.lower(): csvRowString += song.artist_7digid elif attribute == 'hottness'.lower(): csvRowString += song.hottness elif attribute == 'song_hottness'.lower(): csvRowString += song.song_hottness elif attribute == 'digitalid7'.lower(): csvRowString += song.digitalid7 elif attribute == 'similar_artists'.lower(): csvRowString += song.similar_artists elif attribute == 'artist_terms'.lower(): csvRowString += song.artist_terms elif attribute == 'art_terms_freq'.lower(): csvRowString += song.art_terms_freq elif attribute == 'art_terms_weight'.lower(): csvRowString += song.art_terms_weight elif attribute == 'a_sample_rate'.lower(): csvRowString += song.a_sample_rate elif attribute == 'audio_md5'.lower(): csvRowString += "\"" + song.audio_md5 + "\"" elif attribute == 'end_of_fade_in'.lower(): csvRowString += song.end_of_fade_in elif attribute == 'energy'.lower(): csvRowString += song.energy elif attribute == 'loudness'.lower(): csvRowString += song.loudness elif attribute == 'mode'.lower(): csvRowString += song.mode elif attribute == 'mode_conf'.lower(): csvRowString += song.mode_conf elif attribute == 'start_of_fade_out'.lower(): csvRowString += song.start_of_fade_out elif attribute == 'trackid'.lower(): csvRowString += "\"" + song.trackid + "\"" elif attribute == 'segm_start'.lower(): csvRowString += song.segm_start elif attribute == 'segm_conf'.lower(): csvRowString += song.segm_conf elif attribute == 'segm_pitch'.lower(): csvRowString += song.segm_pitch elif attribute == 'segm_timbre'.lower(): csvRowString += song.segm_timbre elif attribute == 'segm_max_loud'.lower(): csvRowString += song.segm_max_loud elif attribute == 'segm_max_loud_time'.lower(): csvRowString += song.segm_max_loud_time elif attribute == 'segm_loud_start'.lower(): csvRowString += song.segm_loud_start elif attribute == 'sect_start'.lower(): csvRowString += song.sect_start elif attribute == 'sect_conf'.lower(): csvRowString += song.sect_conf elif attribute == 'beats_start'.lower(): csvRowString += song.beats_start elif attribute == 'beats_conf'.lower(): csvRowString += song.beats_conf elif attribute == 'bars_start'.lower(): csvRowString += song.bars_start elif attribute == 'bars_conf'.lower(): csvRowString += song.bars_conf elif attribute == 'tatums_start'.lower(): csvRowString += song.tatums_start elif attribute == 'tatums_conf'.lower(): csvRowString += song.tatums_conf elif attribute == 'artist_mbtags'.lower(): csvRowString += song.artist_mbtags elif attribute == 'artist_mbtags_count'.lower(): csvRowString += song.artist_mbtags_count else: csvRowString += "\"ERR\"" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex - 1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()