def centroidsToClusterList(path_centroids):
	centroids = pickle_tools_Luke.returnObjectFromPickle(path_centroids)
	cluster_list = []
	cluster_id = 0
	for centroid in centroids:
		cluster_dict = {
			"cluster_id" : cluster_id,
			"accousticness" : centroid[0],
			"danceability" : centroid[1],
			"energy" : centroid[2],
			"instrumentalness" : centroid[3],
			"valence" : centroid[4]
		}
		cluster = Cluster.Cluster(cluster_dict)
		cluster_list.append(cluster)
		cluster_id += 1
	pickle_tools_Luke.writeToPickle(cluster_list,path_clusters)
Example #2
0
# msd_songs = pickle_tools_Luke.returnObjectFromPickle(out_path)
# spotify_songs = pickle_tools_Luke.returnObjectFromPickle(os.path.join(dir_path,'msd_data/all_song_data.txt'))
# title_list = []
# for song in spotify_songs:
# 	title = song[0].lower().replace("'","").replace(" ","_")
# 	title_list.append(title)
# for song in msd_songs:
# 	if song.attributes['title'].lower().replace("'","").replace(" ","_") in title_list:
# 		print song.attributes['title']
all_songs = []
for subdir, dirs, files in os.walk(song_lists_with_recommended):
    for file in files:
        filepath = subdir + os.sep + file
        if '.DS_Store' in filepath:
            continue
        songs_with_rec = pickle_tools_Luke.returnObjectFromPickle(filepath)
        for song in songs_with_rec:
            all_songs.append(song)
        print "added %d songs to all_songs[]" % len(songs_with_rec)

print "dumping %d songs to: %s" % (len(all_songs), "all_songs_with_rec.txt")
pickle_tools_Luke.writeToPickle(
    all_songs,
    os.path.join(song_lists_with_recommended, "all_songs_with_rec.txt"))

# with Song_DB() as dbase:

# song_list = hdf5_access_Luke.getSongs_MSD(msd_original_data_folder)
# out_path = '/Users/lucasjakober/Documents/Semester 9/Combined Course Project/Code/playlist_recommender/training_data/pickle/songs_msd/song_list_mined_from_hdf5.txt'
# pickle_tools_Luke.writeToPickle(song_list, out_path)
# print songs
Example #3
0
import Song
import pickle_tools_Luke
import os
import deltaVector

dir_path = os.path.dirname(os.path.realpath(__file__))

for subdir, dirs, files in os.walk(
        os.path.join(dir_path, 'training_data/pickle/normalized')):
    for file in files:
        filepath = subdir + os.sep + file
        if '.DS_Store' in filepath:
            continue
        song_list = pickle_tools_Luke.returnObjectFromPickle(filepath)
        agg_vect = deltaVector.findAverage(song_list)
        print agg_vect
        outpath = os.path.join(
            os.path.join(dir_path, 'training_data/pickle/agg_normalized'),
            "agg_" + os.path.basename(filepath))
        pickle_tools_Luke.writeToPickle(agg_vect, outpath)

# dir_path = os.path.dirname(os.path.realpath(__file__))

# for subdir, dirs, files in os.walk(os.path.join(dir_path,'training_data/pickle/normalized')):
# 	for file in files:
# 		filepath = subdir + os.sep + file
# 		if '.DS_Store' in filepath:
# 			continue
# 		song_list = pickle_tools_Luke.returnObjectFromPickle(filepath)
# 		new_song_list = []
# 		for song in song_list:
Example #4
0
import pickle_tools_Luke
import os

dir_path = os.path.dirname(os.path.realpath(__file__))

distance_dictionaries = []
sorted_lists = []

for subdir, dirs, files in os.walk(
        os.path.join(dir_path, 'training_data/pickle/normalized')):
    for file in files:
        filepath1 = subdir + os.sep + file
        if '.DS_Store' in filepath1:
            continue
        if 'Fitness' in filepath1:
            song_list1 = pickle_tools_Luke.returnObjectFromPickle(filepath1)
            for file in files:
                filepath2 = subdir + os.sep + file
                if '.DS_Store' in filepath2:
                    continue
                song_list2 = pickle_tools_Luke.returnObjectFromPickle(
                    filepath2)
                agg_vect1 = deltaVector.findAverage(song_list1)
                agg_vect2 = deltaVector.findAverage(song_list2)
                name1 = os.path.basename(filepath1)
                name2 = os.path.basename(filepath2)
                distance_dict = deltaVector.findDistance(
                    agg_vect1, name1, agg_vect2, name2)
                distance_dictionaries.append(distance_dict)

for dictionary in distance_dictionaries:
	for centroid in centroids:
		cluster_dict = {
			"cluster_id" : cluster_id,
			"accousticness" : centroid[0],
			"danceability" : centroid[1],
			"energy" : centroid[2],
			"instrumentalness" : centroid[3],
			"valence" : centroid[4]
		}
		cluster = Cluster.Cluster(cluster_dict)
		cluster_list.append(cluster)
		cluster_id += 1
	pickle_tools_Luke.writeToPickle(cluster_list,path_clusters)

with Song_DB() as dbase:
	not_clustered = pickle_tools_Luke.returnObjectFromPickle(path_all_songs_no_cluster_id)
	print "clustering %d songs"%len(not_clustered)
	cluster_IDs_by_song_list_index, centroids = k_means_clustering_dataset.cluster_data(not_clustered)
	clustered = dataset_processing.combine_cluster_song_data(not_clustered, cluster_IDs_by_song_list_index)
	pickle_tools_Luke.writeToPickle(clustered,path_all_songs_clustered)
	pickle_tools_Luke.writeToPickle(centroids,path_centroids)
	all_songs = pickle_tools_Luke.returnObjectFromPickle(path_all_songs_clustered)
	
	dbase.createTable_Song()
	dbase.insert_Songs(all_songs)

	centroidsToClusterList(path_centroids)
	cluster_list = pickle_tools_Luke.returnObjectFromPickle(path_clusters)
	for cluster in cluster_list:
		print cluster.attributes