def centroidsToClusterList(path_centroids): centroids = pickle_tools_Luke.returnObjectFromPickle(path_centroids) cluster_list = [] cluster_id = 0 for centroid in centroids: cluster_dict = { "cluster_id" : cluster_id, "accousticness" : centroid[0], "danceability" : centroid[1], "energy" : centroid[2], "instrumentalness" : centroid[3], "valence" : centroid[4] } cluster = Cluster.Cluster(cluster_dict) cluster_list.append(cluster) cluster_id += 1 pickle_tools_Luke.writeToPickle(cluster_list,path_clusters)
# msd_songs = pickle_tools_Luke.returnObjectFromPickle(out_path) # spotify_songs = pickle_tools_Luke.returnObjectFromPickle(os.path.join(dir_path,'msd_data/all_song_data.txt')) # title_list = [] # for song in spotify_songs: # title = song[0].lower().replace("'","").replace(" ","_") # title_list.append(title) # for song in msd_songs: # if song.attributes['title'].lower().replace("'","").replace(" ","_") in title_list: # print song.attributes['title'] all_songs = [] for subdir, dirs, files in os.walk(song_lists_with_recommended): for file in files: filepath = subdir + os.sep + file if '.DS_Store' in filepath: continue songs_with_rec = pickle_tools_Luke.returnObjectFromPickle(filepath) for song in songs_with_rec: all_songs.append(song) print "added %d songs to all_songs[]" % len(songs_with_rec) print "dumping %d songs to: %s" % (len(all_songs), "all_songs_with_rec.txt") pickle_tools_Luke.writeToPickle( all_songs, os.path.join(song_lists_with_recommended, "all_songs_with_rec.txt")) # with Song_DB() as dbase: # song_list = hdf5_access_Luke.getSongs_MSD(msd_original_data_folder) # out_path = '/Users/lucasjakober/Documents/Semester 9/Combined Course Project/Code/playlist_recommender/training_data/pickle/songs_msd/song_list_mined_from_hdf5.txt' # pickle_tools_Luke.writeToPickle(song_list, out_path) # print songs
import Song import pickle_tools_Luke import os import deltaVector dir_path = os.path.dirname(os.path.realpath(__file__)) for subdir, dirs, files in os.walk( os.path.join(dir_path, 'training_data/pickle/normalized')): for file in files: filepath = subdir + os.sep + file if '.DS_Store' in filepath: continue song_list = pickle_tools_Luke.returnObjectFromPickle(filepath) agg_vect = deltaVector.findAverage(song_list) print agg_vect outpath = os.path.join( os.path.join(dir_path, 'training_data/pickle/agg_normalized'), "agg_" + os.path.basename(filepath)) pickle_tools_Luke.writeToPickle(agg_vect, outpath) # dir_path = os.path.dirname(os.path.realpath(__file__)) # for subdir, dirs, files in os.walk(os.path.join(dir_path,'training_data/pickle/normalized')): # for file in files: # filepath = subdir + os.sep + file # if '.DS_Store' in filepath: # continue # song_list = pickle_tools_Luke.returnObjectFromPickle(filepath) # new_song_list = [] # for song in song_list:
import pickle_tools_Luke import os dir_path = os.path.dirname(os.path.realpath(__file__)) distance_dictionaries = [] sorted_lists = [] for subdir, dirs, files in os.walk( os.path.join(dir_path, 'training_data/pickle/normalized')): for file in files: filepath1 = subdir + os.sep + file if '.DS_Store' in filepath1: continue if 'Fitness' in filepath1: song_list1 = pickle_tools_Luke.returnObjectFromPickle(filepath1) for file in files: filepath2 = subdir + os.sep + file if '.DS_Store' in filepath2: continue song_list2 = pickle_tools_Luke.returnObjectFromPickle( filepath2) agg_vect1 = deltaVector.findAverage(song_list1) agg_vect2 = deltaVector.findAverage(song_list2) name1 = os.path.basename(filepath1) name2 = os.path.basename(filepath2) distance_dict = deltaVector.findDistance( agg_vect1, name1, agg_vect2, name2) distance_dictionaries.append(distance_dict) for dictionary in distance_dictionaries:
for centroid in centroids: cluster_dict = { "cluster_id" : cluster_id, "accousticness" : centroid[0], "danceability" : centroid[1], "energy" : centroid[2], "instrumentalness" : centroid[3], "valence" : centroid[4] } cluster = Cluster.Cluster(cluster_dict) cluster_list.append(cluster) cluster_id += 1 pickle_tools_Luke.writeToPickle(cluster_list,path_clusters) with Song_DB() as dbase: not_clustered = pickle_tools_Luke.returnObjectFromPickle(path_all_songs_no_cluster_id) print "clustering %d songs"%len(not_clustered) cluster_IDs_by_song_list_index, centroids = k_means_clustering_dataset.cluster_data(not_clustered) clustered = dataset_processing.combine_cluster_song_data(not_clustered, cluster_IDs_by_song_list_index) pickle_tools_Luke.writeToPickle(clustered,path_all_songs_clustered) pickle_tools_Luke.writeToPickle(centroids,path_centroids) all_songs = pickle_tools_Luke.returnObjectFromPickle(path_all_songs_clustered) dbase.createTable_Song() dbase.insert_Songs(all_songs) centroidsToClusterList(path_centroids) cluster_list = pickle_tools_Luke.returnObjectFromPickle(path_clusters) for cluster in cluster_list: print cluster.attributes