def add_data_to_artists_rel_table(con, data, song_index, artist_id): command = "INSERT INTO ArtistsRel (artist1_id, artist2_id) VALUES (?, ?);" similar_artists = h5.get_similar_artists(data, song_index) cursor = con.cursor() cursor.executemany(command, [(artist_id, similar_artist) for similar_artist in similar_artists]) con.commit()
def get_fields(files): tracks = [] counts = {} field_counts = [] for file in files: h5 = hdf5_getters.open_h5_file_read(file) t = {} t['artist_familiarity'] = hdf5_getters.get_artist_familiarity( h5) # estimation t['artist_hotttnesss'] = hdf5_getters.get_artist_hotttnesss( h5) # estimation t['artist_name'] = hdf5_getters.get_artist_name(h5) # artist name t['release'] = hdf5_getters.get_release(h5) # album name t['title'] = hdf5_getters.get_title(h5) # title t['len_similar_artists'] = len( hdf5_getters.get_similar_artists(h5)) # number of similar artists t['analysis_sample_rate'] = hdf5_getters.get_analysis_sample_rate( h5) # sample rate of the audio used ????????? t['duration'] = hdf5_getters.get_duration(h5) # seconds t['key'] = hdf5_getters.get_key(h5) # key the song is in t['key_confidence'] = hdf5_getters.get_key_confidence( h5) # confidence measure t['loudness'] = hdf5_getters.get_loudness(h5) # overall loudness in dB t['mode_confidence'] = hdf5_getters.get_mode_confidence( h5) # confidence measure t['start_of_fade_out'] = hdf5_getters.get_start_of_fade_out( h5) # time in sec t['tempo'] = hdf5_getters.get_tempo(h5) # estimated tempo in BPM t['time_signature'] = hdf5_getters.get_time_signature( h5) # estimate of number of beats per bar, e.g. 4 t['year'] = hdf5_getters.get_year( h5) # song release year from MusicBrainz or 0 timbre = hdf5_getters.get_segments_timbre( h5) # 2D float array, texture features (MFCC+PCA-like) t['segments_timbre'] = timbre t['timbre_avg'] = timbre.mean(axis=0) # list of 12 averages cov_mat_timbre = np.cov(timbre, rowvar=False) cov_timbre = [] for i in range(len(cov_mat_timbre)): for j in range(len(cov_mat_timbre) - i): cov_timbre.append(cov_mat_timbre[i][j]) t['timbre_cov'] = cov_timbre # list of 78 covariances pitch = hdf5_getters.get_segments_pitches( h5) # 2D float array, chroma feature, one value per note t['segments_pitch'] = pitch t['pitch_avg'] = pitch.mean(axis=0) # list of 12 averages cov_mat_pitch = np.cov(pitch, rowvar=False) cov_pitch = [] for i in range(len(cov_mat_pitch)): for j in range(len(cov_mat_pitch) - i): cov_pitch.append(cov_mat_timbre[i][j]) t['pitch_cov'] = cov_pitch # list of 78 covariances # seg_pitch = hdf5_getters.get_segments_pitches(h5) # 2D float array, chroma feature, one value per note # print(seg_pitch.shape) # t['artist_latitude'] = hdf5_getters.get_artist_latitude(h5) # float, ???????????????????????????????????????? # t['artist_longitude'] = hdf5_getters.get_artist_longitude(h5) # float, ?????????????????????????????????????? # t['artist_location'] = hdf5_getters.get_artist_location(h5) # location name # t['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss(h5) # estimation # t['danceability'] = hdf5_getters.get_danceability(h5) # estimation # t['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5) # seconds at the beginning of the song # t['energy'] = hdf5_getters.get_energy(h5) # energy from listener point of view # t['mode'] = hdf5_getters.get_mode(h5) # major or minor # t['time_signature_confidence'] = hdf5_getters.get_time_signature_confidence(h5) # confidence measure # t['artist_mbtags_count'] = len(hdf5_getters.get_artist_mbtags_count(h5)) # array int, tag counts for musicbrainz tags # bad types or non arithmatic numbers ''' # t['audio_md5'] = hdf5_getters.get_audio_md5(h5) # hash code of the audio used for the analysis by The Echo Nest # t['artist_terms_weight'] = hdf5_getters.get_artist_terms_weight(h5) # array float, echonest tags weight ????? # t['artist_terms_freq'] = hdf5_getters.get_artist_terms_freq(h5) # array float, echonest tags freqs ?????????? # t['artist_terms'] = hdf5_getters.get_artist_terms(h5) # array string, echonest tags ????????????????????????? # t['artist_id'] = hdf5_getters.get_artist_id(h5) # echonest id # t['artist_mbid'] = hdf5_getters.get_artist_mbid(h5) # musicbrainz id # t['artist_playmeid'] = hdf5_getters.get_artist_playmeid(h5) # playme id # t['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid(h5) # 7digital id # t['release_7digitalid'] = hdf5_getters.get_release_7digitalid(h5) # 7digital id # t['song_id'] = hdf5_getters.get_song_id(h5) # echonest id # t['track_7digitalid'] = hdf5_getters.get_track_7digitalid(h5) # 7digital id # t['similar_artists'] = hdf5_getters.get_similar_artists(h5) # string array of sim artist ids # t['track_id'] = hdf5_getters.get_track_id(h5) # echonest track id # t['segments_start'] = hdf5_getters.get_segments_start(h5) # array floats, musical events, ~ note onsets # t['segments_confidence'] = hdf5_getters.get_segments_confidence(h5) # array floats, confidence measure # t['segments_pitches'] = hdf5_getters.get_segments_pitches(h5) # 2D float array, chroma feature, one value per note # t['segments_timbre'] = hdf5_getters.get_segments_timbre(h5) # 2D float array, texture features (MFCC+PCA-like) # t['segments_loudness_max'] = hdf5_getters.get_segments_loudness_max(h5) # float array, max dB value # t['segments_loudness_max_time'] = hdf5_getters.get_segments_loudness_max_time(h5) # float array, time of max dB value, i.e. end of attack # t['segments_loudness_start'] = hdf5_getters.get_segments_loudness_start(h5) # array float, dB value at onset # t['sections_start'] = hdf5_getters.get_sections_start(h5) # array float, largest grouping in a song, e.g. verse # t['sections_confidence'] = hdf5_getters.get_sections_confidence(h5) # array float, confidence measure # t['beats_start'] = hdf5_getters.get_beats_start(h5) # array float, result of beat tracking # t['beats_confidence'] = hdf5_getters.get_beats_confidence(h5) # array float, confidence measure # t['bars_start'] = hdf5_getters.get_bars_start(h5) # array float, beginning of bars, usually on a beat # t['bars_confidence'] = hdf5_getters.get_bars_confidence(h5) # array float, confidence measure # t['tatums_start'] = hdf5_getters.get_tatums_start(h5) # array float, smallest rythmic element # t['tatums_confidence'] = hdf5_getters.get_tatums_confidence(h5) # array float, confidence measure # t['artist_mbtags'] = hdf5_getters.get_artist_mbtags(h5) # array string, tags from musicbrainz.org ''' h5.close() for key, value in t.items(): if isinstance(value, float) and math.isnan(value): pass if type(value) is np.ndarray: if key in counts.keys(): counts[key] += 1 else: counts[key] = 1 elif value: if key in counts.keys(): counts[key] += 1 else: counts[key] = 1 elif key not in counts.keys(): counts[key] = 0 count = 0 for key, value in t.items(): if isinstance(value, float) and math.isnan(value): pass elif type(value) is np.ndarray: count += 1 elif value: count += 1 field_counts.append(count) # progress bar if num_of_tracks >= 100: i = files.index(file) + 1 scale = num_of_tracks / 100 if i % math.ceil(len(files) * .05) == 0: sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("Loading dataframe: [%-100s] %d%%" % ('=' * int(i // scale), 1 / scale * i)) sys.stdout.flush() time.sleep(.01) tracks.append(t) print() return tracks, counts, field_counts
import os import hdf5_getters rootdir = '../MillionSongSubset/data/A/A/A' for subdir, dirs, files in os.walk(rootdir): for file in files: if file.endswith(".h5"): h5 = hdf5_getters.open_h5_file_read(subdir+"/"+file) similar = hdf5_getters.get_similar_artists(h5) for i in range (0,99): print similar[i] h5.close()
def writeSingleHDF5FileToTxtFile(songHDF5FileName): global maximumArtistNameLen global maximumArtistTagLen global maximumSongNameLen global maximumAlbumNameLen """ This function does 3 simple things: - open the song file - get artist ID and put it - close the file """ songHDF5File = GETTERS.open_h5_file_read(songHDF5FileName) songID = GETTERS.get_song_id(songHDF5File) songName = GETTERS.get_title(songHDF5File) artistID = GETTERS.get_artist_id(songHDF5File) songAlbum = GETTERS.get_release(songHDF5File) songYear = GETTERS.get_year(songHDF5File) songTempo = GETTERS.get_tempo(songHDF5File) songDanceability = GETTERS.get_danceability(songHDF5File) songDuration = GETTERS.get_duration(songHDF5File) songEnergy = GETTERS.get_energy(songHDF5File) songKey = GETTERS.get_key(songHDF5File) songLoudness = GETTERS.get_loudness(songHDF5File) songMode = GETTERS.get_mode(songHDF5File) songTimeSignature = GETTERS.get_time_signature(songHDF5File) songsTableFile.write(songID + "\t" + songName + "\t" + artistID + "\t" + songAlbum + "\t" + str(songYear) + "\t" + str(songTempo) + "\t" + str(songDanceability) + "\t" + str(songDuration) + "\t" + str(songEnergy) + "\t" + str(songKey) + "\t" + str(songLoudness) + "\t" + str(songMode) + "\t" + str(songTimeSignature) + "\t\n") artistName = GETTERS.get_artist_name(songHDF5File) artistFamiliarity = GETTERS.get_artist_familiarity(songHDF5File) artistTagsArray = GETTERS.get_artist_mbtags(songHDF5File) artistsTableFile.write(artistID + "\t" + artistName + "\t" + str(artistFamiliarity) + "\t\n") if len(songName) > maximumSongNameLen: maximumSongNameLen = len(songName) if len(songAlbum) > maximumAlbumNameLen: maximumAlbumNameLen = len(songAlbum) if len(artistName) > maximumArtistNameLen: maximumArtistNameLen = len(artistName) for artistTag in artistTagsArray: if artistTag in allowedTagsSet: artistsTagsTableFile.write(artistID + "\t" + artistTag + "\t\n") if artistTag not in tagsSet: tagsTableFile.write(artistTag + "\t\n") tagsSet.add(artistTag) if len(artistTag) > maximumArtistTagLen: maximumArtistTagLen = len(artistTag) similarArtists = GETTERS.get_similar_artists(songHDF5File) for similarArtist in similarArtists: similarArtistsPairsList.add((artistID, similarArtist)) artistsIDsSet.add(artistID) artistsNamesSet.add(artistName) songHDF5File.close()
mode = GETTERS.get_mode(h5, i) mode_confidence = GETTERS.get_mode_confidence(h5, i) release = GETTERS.get_release(h5, i) release_7digitalid = GETTERS.get_release_7digitalid(h5, i) #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array similar_artists = ','.join( str(e) for e in GETTERS.get_similar_artists(h5, i)) # array song_hotttnesss = GETTERS.get_song_hotttnesss(h5, i) song_id = GETTERS.get_song_id(h5, i) start_of_fade_out = GETTERS.get_start_of_fade_out(h5, i) #tatums_confidence = ','.join(str(e) for e in GETTERS.get_tatums_confidence(h5, i)) # array #tatums_start = ','.join(str(e) for e in GETTERS.get_tatums_start(h5, i)) # array tempo = GETTERS.get_tempo(h5, i) time_signature = GETTERS.get_time_signature(h5, i) time_signature_confidence = GETTERS.get_time_signature_confidence( h5, i) title = GETTERS.get_title(h5, i) track_7digitalid = GETTERS.get_track_7digitalid(h5, i) track_id = GETTERS.get_track_id(h5, i) year = GETTERS.get_year(h5, i) loops += 1
def fill_attributes(song, songH5File): #----------------------------non array attributes------------------------------- song.analysisSampleRate = str( hdf5_getters.get_analysis_sample_rate(songH5File)) song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File)) song.artistFamiliarity = str( hdf5_getters.get_artist_familiarity(songH5File)) song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File)) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File)) song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File)) song.energy = str(hdf5_getters.get_energy(songH5File)) song.key = str(hdf5_getters.get_key(songH5File)) song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_segments_confidence(songH5File)) song.segementsConfidence = str( hdf5_getters.get_sections_confidence(songH5File)) song.loudness = str(hdf5_getters.get_loudness(songH5File)) song.mode = str(hdf5_getters.get_mode(songH5File)) song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File)) song.release = str(hdf5_getters.get_release(songH5File)) song.releaseDigitalID = str( hdf5_getters.get_release_7digitalid(songH5File)) song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File)) song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str( hdf5_getters.get_time_signature_confidence(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.trackID = str(hdf5_getters.get_track_id(songH5File)) song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) #-------------------------------array attributes-------------------------------------- #array float song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar( hdf5_getters.get_beats_start(songH5File)) #array float song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_freq(songH5File)) #array float song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar( hdf5_getters.get_artist_terms_weight(songH5File)) #array int song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar( hdf5_getters.get_artist_mbtags_count(songH5File)) #array float song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_bars_confidence(songH5File)) #array float song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar( hdf5_getters.get_bars_start(songH5File)) #array float song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_beats_confidence(songH5File)) #array float song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_sections_confidence(songH5File)) #array float song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar( hdf5_getters.get_sections_start(songH5File)) #array float song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_segments_confidence(songH5File)) #array float song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max(songH5File)) #array float song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_max_time(songH5File)) #array float song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_loudness_start(songH5File)) #array float song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar( hdf5_getters.get_segments_start(songH5File)) #array float song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar( hdf5_getters.get_tatums_confidence(songH5File)) #array float song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar( hdf5_getters.get_tatums_start(songH5File)) #array2d float song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_timbre(songH5File)) #array2d float song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar( hdf5_getters.get_segments_pitches(songH5File)) #------------------------array string attributes------------------------ song.similarArtists = convert_array_to_string( hdf5_getters.get_similar_artists(songH5File)) #array string song.artistTerms = convert_array_to_string( hdf5_getters.get_artist_terms(songH5File)) #array string song.artistmbTags = convert_array_to_string( hdf5_getters.get_artist_mbtags(songH5File)) #array string return song
def main(): outputFile1 = open('SongCSV.csv', 'w') csvRowString = "" ################################################# #if you want to prompt the user for the order of attributes in the csv, #leave the prompt boolean set to True #else, set 'prompt' to False and set the order of attributes in the 'else' #clause prompt = False ################################################# if prompt == True: while prompt: prompt = False csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" + "Please delineate with commas. The options are: " + "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+ " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," + " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" + "For example, you may write \"Title, Tempo, Duration\"...\n\n" + "...or exit by typing 'exit'.\n\n") csvAttributeList = re.split('\W+', csvAttributeString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += 'AlbumID' elif attribute == 'AlbumName'.lower(): csvRowString += 'AlbumName' elif attribute == 'ArtistID'.lower(): csvRowString += 'ArtistID' elif attribute == 'ArtistLatitude'.lower(): csvRowString += 'ArtistLatitude' elif attribute == 'ArtistLocation'.lower(): csvRowString += 'ArtistLocation' elif attribute == 'ArtistLongitude'.lower(): csvRowString += 'ArtistLongitude' elif attribute == 'ArtistName'.lower(): csvRowString += 'ArtistName' elif attribute == 'Danceability'.lower(): csvRowString += 'Danceability' elif attribute == 'Duration'.lower(): csvRowString += 'Duration' elif attribute == 'ArtistHotttnesss'.lower(): csvRowString += 'ArtistHotttnesss' elif attribute == 'KeySignature'.lower(): csvRowString += 'KeySignature' elif attribute == 'KeySignatureConfidence'.lower(): csvRowString += 'KeySignatureConfidence' elif attribute == 'SongID'.lower(): csvRowString += "SongID" elif attribute == 'SongHotttnesss'.lower(): csvRowString += 'SongHotttnesss' elif attribute == 'Tempo'.lower(): csvRowString += 'Tempo' elif attribute == 'Loudness'.lower(): csvRowString += 'Loudness' elif attribute == 'Energy'.lower(): csvRowString += 'Energy' elif attribute == 'TimeSignature'.lower(): csvRowString += 'TimeSignature' elif attribute == 'TimeSignatureConfidence'.lower(): csvRowString += 'TimeSignatureConfidence' elif attribute == 'Title'.lower(): csvRowString += 'Title' elif attribute == 'Year'.lower(): csvRowString += 'Year' elif attribute == 'Exit'.lower(): sys.exit() else: prompt = True print "==============" print "I believe there has been an error with the input." print "==============" break csvRowString += "," lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString); csvRowString = "" #else, if you want to hard code the order of the csv file and not prompt #the user, else: ################################################# #change the order of the csv file here #Default is to list all available attributes (in alphabetical order) csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistHotttnesss,ArtistFamiliarity,ArtistLatitude,ArtistLocation,"+ "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+ "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+ "Title,SongHotttnesss,Loudness,Energy,Year,SimilarArtists,Genre,Audio") ################################################# csvAttributeList = re.split('\W+', csvRowString) for i, v in enumerate(csvAttributeList): csvAttributeList[i] = csvAttributeList[i].lower() outputFile1.write("SongNumber,"); outputFile1.write(csvRowString + "\n"); csvRowString = "" ################################################# #TODO Enter base folder here #Set the basedir here, the root directory from which the search #for files stored in a (hierarchical data structure) will originate basedir = "C:\Users\NadavSpitzer\Documents\מדמח\שנה ג'\סמסטר א'\סדנה במסדי נתונים\DB\MillionSongSubset\data" # "." As the default means the current directory ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files. ################################################# #FOR LOOP for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+ext)) for f in files: print f songH5File = hdf5_getters.open_h5_file_read(f) song = Song(str(hdf5_getters.get_song_id(songH5File))) testDanceability = hdf5_getters.get_danceability(songH5File) #print type(testDanceability) #print ("Here is the danceability: ") + str(testDanceability) song.artistID = str(hdf5_getters.get_artist_id(songH5File)) song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File)) song.albumName = str(hdf5_getters.get_release(songH5File)) song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File)) song.artistLocation = str(hdf5_getters.get_artist_location(songH5File)) song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File)) song.artistName = str(hdf5_getters.get_artist_name(songH5File)) song.danceability = str(hdf5_getters.get_danceability(songH5File)) song.duration = str(hdf5_getters.get_duration(songH5File)) song.artistHotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File)) song.keySignature = str(hdf5_getters.get_key(songH5File)) song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File)) song.lyrics = None song.popularity = None song.tempo = str(hdf5_getters.get_tempo(songH5File)) song.timeSignature = str(hdf5_getters.get_time_signature(songH5File)) song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File)) song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File)) song.title = str(hdf5_getters.get_title(songH5File)) song.year = str(hdf5_getters.get_year(songH5File)) song.setGenreList(songH5File) song.songLoudness = str(hdf5_getters.get_loudness(songH5File)) song.songEnergy = str(hdf5_getters.get_energy(songH5File)) song.artistFmiliarity = str(hdf5_getters.get_artist_familiarity(songH5File)) song.similarArtists = hdf5_getters.get_similar_artists(songH5File) song.audio = hdf5_getters.get_audio_md5(songH5File) #print song count csvRowString += str(song.songCount) + "," for attribute in csvAttributeList: # print "Here is the attribute: " + attribute + " \n" if attribute == 'AlbumID'.lower(): csvRowString += song.albumID elif attribute == 'AlbumName'.lower(): albumName = song.albumName albumName = albumName.replace(',',"") csvRowString += "\"" + albumName + "\"" elif attribute == 'ArtistID'.lower(): csvRowString += "\"" + song.artistID + "\"" elif attribute == 'ArtistLatitude'.lower(): latitude = song.artistLatitude if latitude == 'nan': latitude = '' csvRowString += latitude elif attribute == 'ArtistLocation'.lower(): location = song.artistLocation location = location.replace(',','') csvRowString += "\"" + location + "\"" elif attribute == 'ArtistLongitude'.lower(): longitude = song.artistLongitude if longitude == 'nan': longitude = '' csvRowString += longitude elif attribute == 'ArtistName'.lower(): csvRowString += "\"" + song.artistName + "\"" elif attribute == 'Danceability'.lower(): csvRowString += song.danceability elif attribute == 'Duration'.lower(): csvRowString += song.duration elif attribute == 'ArtistHotttnesss'.lower(): csvRowString += song.artistHotttnesss elif attribute == 'ArtistFamiliarity'.lower(): csvRowString += song.artistFmiliarity elif attribute == 'KeySignature'.lower(): csvRowString += song.keySignature elif attribute == 'KeySignatureConfidence'.lower(): # print "key sig conf: " + song.timeSignatureConfidence csvRowString += song.keySignatureConfidence elif attribute == 'SongID'.lower(): csvRowString += "\"" + song.id + "\"" elif attribute == 'SongHotttnesss'.lower(): csvRowString += song.songHotttnesss elif attribute == 'Tempo'.lower(): # print "Tempo: " + song.tempo csvRowString += song.tempo elif attribute == 'Loudness'.lower(): csvRowString += song.songLoudness elif attribute == 'Energy'.lower(): csvRowString += song.songEnergy elif attribute == 'TimeSignature'.lower(): csvRowString += song.timeSignature elif attribute == 'TimeSignatureConfidence'.lower(): # print "time sig conf: " + song.timeSignatureConfidence csvRowString += song.timeSignatureConfidence elif attribute == 'Title'.lower(): csvRowString += "\"" + song.title + "\"" elif attribute == 'Year'.lower(): csvRowString += song.year elif attribute == 'SimilarArtists'.lower(): csvRowString += "\"" for s in song.similarArtists: csvRowString += "\"\"" + str(s) + "\"\"" + "," csvRowString = csvRowString.rstrip(",") csvRowString += "\"" elif attribute == 'Genre'.lower(): csvRowString += "\"" for g in song.genreList: csvRowString += "\"\"" + str(g) + "\"\"" + "," csvRowString = csvRowString.rstrip(",") csvRowString += "\"" elif attribute == 'Audio'.lower(): csvRowString += song.audio else: csvRowString += "Erm. This didn't work. Error. :( :(\n" csvRowString += "," #Remove the final comma from each row in the csv lastIndex = len(csvRowString) csvRowString = csvRowString[0:lastIndex-1] csvRowString += "\n" outputFile1.write(csvRowString) csvRowString = "" songH5File.close() outputFile1.close()
song.endOfFadeIn = remove_trap_characters( str(hdf5_getters.get_end_of_fade_in(songH5File))) song.startOfFadeOut = remove_trap_characters( str(hdf5_getters.get_start_of_fade_out(songH5File))) song.energy = remove_trap_characters( str(hdf5_getters.get_energy(songH5File))) song.release = remove_trap_characters( str(hdf5_getters.get_release(songH5File))) song.release7digitalid = remove_trap_characters( str(hdf5_getters.get_release_7digitalid(songH5File))) song.songHotness = remove_trap_characters( str(hdf5_getters.get_song_hotttnesss(songH5File))) song.track7digitalid = remove_trap_characters( str(hdf5_getters.get_track_7digitalid(songH5File))) temp = hdf5_getters.get_similar_artists(songH5File) song.similarartists = remove_trap_characters(str(list(list(temp)))) song.similarArtistsCount = get_list_length(temp) song.loudness = remove_trap_characters( str(hdf5_getters.get_loudness(songH5File))) song.mode = remove_trap_characters( str(hdf5_getters.get_mode(songH5File))) song.modeConfidence = remove_trap_characters( str(hdf5_getters.get_mode_confidence(songH5File))) song.artistName = remove_trap_characters( str(hdf5_getters.get_artist_name(songH5File))) song.danceability = remove_trap_characters( str(hdf5_getters.get_danceability(songH5File))) song.duration = remove_trap_characters( str(hdf5_getters.get_duration(songH5File))) song.keySignature = remove_trap_characters(
row += [h5.get_artist_terms_freq(ds)] row += [h5.get_artist_terms_weight(ds)] row += [h5.get_danceability(ds)] row += [h5.get_energy(ds)] row += [h5.get_key(ds)] row += [h5.get_mode(ds)] row += [h5.get_loudness(ds)] row += [ parent_folder + '/' + sub_folder + '/' + child_folder + '/' ] row += [file] row += [h5.get_duration(ds)] row += [h5.get_artist_familiarity(ds)] row += [h5.get_similar_artists(ds)] row += [h5.get_artist_id(ds)] row += [h5.get_title(ds)] row += [h5.get_song_hotttnesss(ds)] row += [h5.get_year(ds)] row += [h5.get_artist_latitude(ds)] row += [h5.get_artist_longitude(ds)] row += [ get_midi_name_from_matched( file[:-3], matched_scores) ] ds.close() csv_writer.writerow(row)
key_confidence = GETTERS.get_key_confidence(h5, i) loudness = GETTERS.get_loudness(h5, i) mode = GETTERS.get_mode(h5, i) mode_confidence = GETTERS.get_mode_confidence(h5, i) release = GETTERS.get_release(h5, i) release_7digitalid = GETTERS.get_release_7digitalid(h5, i) #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array similar_artists = ','.join(str(e) for e in GETTERS.get_similar_artists(h5, i)) # array song_hotttnesss = GETTERS.get_song_hotttnesss(h5, i) song_id = GETTERS.get_song_id(h5, i) start_of_fade_out = GETTERS.get_start_of_fade_out(h5, i) #tatums_confidence = ','.join(str(e) for e in GETTERS.get_tatums_confidence(h5, i)) # array #tatums_start = ','.join(str(e) for e in GETTERS.get_tatums_start(h5, i)) # array tempo = GETTERS.get_tempo(h5, i) time_signature = GETTERS.get_time_signature(h5, i) time_signature_confidence = GETTERS.get_time_signature_confidence(h5, i) title = GETTERS.get_title(h5, i) track_7digitalid = GETTERS.get_track_7digitalid(h5, i) track_id = GETTERS.get_track_id(h5, i) year = GETTERS.get_year(h5, i) loops += 1
def hd5_single_random_file_parser(): # Open an h5 file in read mode h5 = hdf5_getters.open_h5_file_read( '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5' ) function_tracker = filter( lambda x: x.startswith('get'), hdf5_getters.__dict__.keys()) # Detects all the getter functions for f in function_tracker: # Print everything in function tracker print(f) # First effort to check what each field contains. print() # 55 available fields (exluding number of songs fields) print("Num of songs -- ", hdf5_getters.get_num_songs(h5)) # One song per file print("Title -- ", hdf5_getters.get_title(h5)) # Print the title of a specific h5 file print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5)) print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5)) print("Artist ID -- ", hdf5_getters.get_artist_id(h5)) print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5)) print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5)) print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5)) print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5)) print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5)) print("Artist location -- ", hdf5_getters.get_artist_location(h5)) print("Artist Name -- ", hdf5_getters.get_artist_name(h5)) print("Release -- ", hdf5_getters.get_release(h5)) print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5)) print("Song ID -- ", hdf5_getters.get_song_id(h5)) print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5)) print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5)) print("Similar artists -- ", hdf5_getters.get_similar_artists(h5)) print("Artist terms -- ", hdf5_getters.get_artist_terms(h5)) print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5)) print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5)) print("Analysis sample rate -- ", hdf5_getters.get_analysis_sample_rate(h5)) print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5)) print("Danceability -- ", hdf5_getters.get_danceability(h5)) print("Duration -- ", hdf5_getters.get_duration(h5)) print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5)) print("Energy -- ", hdf5_getters.get_energy(h5)) print("Key -- ", hdf5_getters.get_key(h5)) print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5)) print("Loudness -- ", hdf5_getters.get_loudness(h5)) print("Mode -- ", hdf5_getters.get_mode(h5)) print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5)) print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5)) print("Tempo -- ", hdf5_getters.get_tempo(h5)) print("Time signature -- ", hdf5_getters.get_time_signature(h5)) print("Time signature confidence -- ", hdf5_getters.get_time_signature_confidence(h5)) print("Track ID -- ", hdf5_getters.get_track_id(h5)) print("Segments Start -- ", hdf5_getters.get_segments_start(h5)) print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5)) print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5)) print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5)) print("Segments Loudness max -- ", hdf5_getters.get_segments_loudness_max(h5)) print("Segments Loudness max time-- ", hdf5_getters.get_segments_loudness_max_time(h5)) print("Segments Loudness start -- ", hdf5_getters.get_segments_loudness_start(h5)) print("Sections start -- ", hdf5_getters.get_sections_start(h5)) print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5)) print("Beats start -- ", hdf5_getters.get_beats_start(h5)) print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5)) print("Bars start -- ", hdf5_getters.get_bars_start(h5)) print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5)) print("Tatums start -- ", hdf5_getters.get_tatums_start(h5)) print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5)) print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5)) print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5)) print("Year -- ", hdf5_getters.get_year(h5)) fields = ['Title', 'Artist ID'] with open('Tester2.csv', 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=';') # writing the fields csv_writer.writerow(fields) # writing the data rows csv_writer.writerow( [hdf5_getters.get_title(h5), hdf5_getters.get_artist_id(h5)]) h5.close() # close h5 when completed in the end
def getInfo(files): data = [] build_str = '' with open(sys.argv[1], 'r') as f: contents = f.read() c = contents.split() f.close() print("creating csv with following fields:" + contents) for i in c: build_str = build_str + i + ',' build_str = build_str[:-1] build_str = build_str + '\n' for fil in files: curFile = getters.open_h5_file_read(fil) d2 = {} get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)} tid = fil.split('/')[-1].split('.')[0] # print(c) for i in c: if i in get_table: d2[i] = get_table[i] d2[i] = str(d2[i]).replace('\n','') build_str = build_str + d2[i] + ',' else: print('error: unspecified field') exit(0) build_str = build_str[:-1] # print(build_str[:-1]) build_str = build_str + '\n' curFile.close() build_str = build_str.replace('b','').replace("'",'').replace('"','') return (build_str)