def control_quality_rf(): ''' Controls the quality of the RF recommendations. ''' listened_songs = get_listened_songs(limits=LIMIT_LIST, needs_good=True)[0] users_to_remove = random.sample( listened_songs.keys(), len(listened_songs) - 5) for user in users_to_remove: listened_songs.pop(user) song_dict = read_song_dict_w_labels() bad_points = [] medium_points = [] good_points = [] i = 1 for user in listened_songs: ent_size = random.randint(1, min(3, len(listened_songs[user]['good']))) entered_ids = random.sample(listened_songs[user]['good'], ent_size) point_dict = recommend_w_rf(song_dict, entered_ids, 1)[1] for like, point_list in zip(['bad', 'medium', 'good'], [bad_points, medium_points, good_points]): songs_to_check = [ song_id for song_id in listened_songs[user][like] if song_id not in entered_ids] for song_id in songs_to_check: point_list.append(point_dict[song_id]) print(i, '/', len(listened_songs)) i += 1 for song in song_dict: song_dict[song]['points'] = 0 print('average points for bad songs', np.mean(bad_points)) print('average points for medium songs', np.mean(medium_points)) print('average points for good songs', np.mean(good_points))
def changerfe(): listened_songs = get_listened_songs(limits=LIMIT_LIST)[0] song_dict = read_song_dict_w_labels() rf_songs = [] for user in listened_songs: for song in listened_songs[user]['good']: rf_songs.append(song) for song in song_dict: song_dict[song]['rf_enterable'] = song in rf_songs i = 1 for item in chunks(song_dict): with open(MSD_DATA_LABELED_PART_FILE_PATH.format(i), 'w') as f: json.dump(item, f) i += 1 f.close()
def get_rnd_good_songs(song_dict, amount, limits=LIMIT_LIST): ''' Get an amount of random song ids that at least one user has in his good category. ''' listened_songs = get_listened_songs(limits=limits)[0] # find all songs that at least one user has in his good category good_songs = [] for user in listened_songs.keys(): good_songs.extend(listened_songs[user]['good']) song_ids_2_pop = [] song_dict_temp = song_dict.copy() for song_id in song_dict_temp: if song_id not in good_songs: song_ids_2_pop.append(song_id) # remove all songs that aren't good for song_id in song_ids_2_pop: song_dict_temp.pop(song_id) return random.sample(song_dict_temp.keys(), amount)
def test_rf(): ''' Tests different Random Forest configurations. ''' song_dict = read_song_dict_w_labels() results = {} with open(RF_TEST_FILE_PATH, 'w') as outfile: for i in range(1, 20): # test 20 different random limits ent_amount = random.randint(1, 5) # 1-5 songs as entered songs limits = get_rnd_limits() entered_ids = get_rnd_good_songs( song_dict, ent_amount, limits=limits) # all users and the songs they've listened to listened_songs = get_listened_songs(limits=limits)[0] # get all combinations of PARS containing 3-6 parameters par_combinations = [] for i in range(3, len(PARS) + 1): listing = [list(x) for x in itertools.combinations(PARS, i)] par_combinations.extend(listing) # test for each combination for par_combination in par_combinations: # lists for building the Random Forest rf_pars = [] rf_targets = [] rf_ids = [] # tracks the songs that are already included rf_targets_quant = [] # tracks how often each song is already included for user in listened_songs.keys(): # only users are taken into account to build the random forest that # have at least one of the entered songs in their 'good' category contains_good_song = False for song_id in entered_ids: contains_good_song = song_id in listened_songs[user]['good'] if contains_good_song: for like, like_value in zip(['bad', 'medium', 'good'], [1, 2, 3]): for song_id in listened_songs[user][like]: if song_id not in rf_ids: rf_ids.append(song_id) rf_pars.append([song_dict[song_id][par] for par in par_combination]) rf_targets_quant.append(1) rf_targets.append(like_value) # if a value already exists for that song, take the # average of all values else: index = rf_ids.index(song_id) rf_targets_quant[index] += 1 rf_targets[index] = (rf_targets[index] + like_value) \ / rf_targets_quant[index] # convert integers back to bad, medium, and good category strings for index in range(0, len(rf_targets)): if int(rf_targets[index]) == 1: rf_targets[index] = 'bad' elif int(rf_targets[index]) == 2: rf_targets[index] = 'medium' else: rf_targets[index] = 'good' scores = [] for j in range(0, 3): # build the RF 3 times and take the average score # split into training and test data 70/30 training_indices = random.sample( range(0, len(rf_targets)), int(0.7 * len(rf_targets))) test_indices = [index for index in range(0, len(rf_targets)) if user not in training_indices] rf_targets_train = [rf_targets[index] for index in training_indices] rf_targets_test = [rf_targets[index] for index in test_indices] rf_pars_train = [rf_pars[index] for index in training_indices] rf_pars_test = [rf_pars[index] for index in test_indices] # build the Random Forest rf = RandomForestClassifier() rf.fit(rf_pars_train, rf_targets_train) scores.append(rf.score(rf_pars_test, rf_targets_test)) outfile.write( 'Limits: [' + str(limits[0]) + ', ' + str(limits[1]) + '] ') outfile.write('Songs entered: ' + str(ent_amount) + ' ') par_comb_string = '' for i in range(0, len(par_combination) - 1): par_comb_string += par_combination[i] + ', ' par_comb_string += par_combination[len(par_combination) - 1] outfile.write('Par-combination: [' + par_comb_string + '] ') avg_score = np.mean(scores) outfile.write('Average score: ' + str(avg_score) + '\n') for arg in [limits, ent_amount, par_combination]: if str(arg) not in results.keys(): results[str(arg)] = [] results[str(arg)].append(avg_score) for key in results.keys(): outfile.write('Average score for ' + key + ': ' + str(np.mean(results[key])) + '\n')
def recommend_w_rf(song_dict, entered_ids, amount): ''' Recommend an amount of songs for specific entered songs. Only songs that a Random Forest predicts as 'good' are taken into account. song_dict: Dictionary containing all song information entered_ids: the song ids entered by the user amount: how many songs to recommend ''' # lists for building the Random Forest rf_pars = [] rf_targets = [] rf_ids = [] # tracks the songs that are already included rf_targets_quant = [] # tracks how often each song is already included # all users and the songs they've listened to listened_songs = get_listened_songs(limits=LIMIT_LIST)[0] for user in listened_songs.keys(): # only users are taken into account to build the random forest that # have at least one of the entered songs in their 'good' category contains_good_song = False for song_id in entered_ids: contains_good_song = song_id in listened_songs[user]['good'] if contains_good_song: for like, like_value in zip(['bad', 'medium', 'good'], [1, 2, 3]): for song_id in listened_songs[user][like]: if song_id not in rf_ids: rf_ids.append(song_id) rf_pars.append([ song_dict[song_id][par] for par in CHOSEN_PARS_RF ]) rf_targets_quant.append(1) rf_targets.append(like_value) # if a value already exists for that song, take the # average of all values else: index = rf_ids.index(song_id) rf_targets_quant[index] += 1 rf_targets[index] = (rf_targets[index] + like_value) \ / rf_targets_quant[index] # convert integers back to bad, medium, and good category strings for index in range(0, len(rf_targets)): if int(rf_targets[index]) == 1: rf_targets[index] = 'bad' elif int(rf_targets[index]) == 2: rf_targets[index] = 'medium' else: rf_targets[index] = 'good' # build the Random Forest rf = RandomForestClassifier() rf.fit(rf_pars, rf_targets) # predict bad, medium, or good category for each song in the data ids = [] songs_to_predict = [] for song_id in song_dict: songs_to_predict.append( [song_dict[song_id][par] for par in CHOSEN_PARS_RF]) ids.append(song_id) predictions = rf.predict(songs_to_predict) # points are given only to songs that are predicted in the good category good_indices = [] # get the indices of the good songs for i in range(0, len(predictions)): if predictions[i] == 'good': good_indices.append(i) ids_to_give_points = [ ids[index] for index in good_indices if ids[index] not in entered_ids ] # for each entered song, distribute points to all good songs for song_id in entered_ids: song_dict = distribute_points(ids_to_give_points, song_dict, song_id, CHOSEN_PARS_RF) return print_recommendation(song_dict, entered_ids, amount)