예제 #1
0
def control_quality_rf():
    ''' Controls the quality of the RF recommendations. '''
    listened_songs = get_listened_songs(limits=LIMIT_LIST, needs_good=True)[0]
    users_to_remove = random.sample(
        listened_songs.keys(), len(listened_songs) - 5)
    for user in users_to_remove:
        listened_songs.pop(user)
    song_dict = read_song_dict_w_labels()
    bad_points = []
    medium_points = []
    good_points = []
    i = 1
    for user in listened_songs:
        ent_size = random.randint(1, min(3, len(listened_songs[user]['good'])))
        entered_ids = random.sample(listened_songs[user]['good'], ent_size)
        point_dict = recommend_w_rf(song_dict, entered_ids, 1)[1]
        for like, point_list in zip(['bad', 'medium', 'good'],
                                    [bad_points, medium_points, good_points]):
            songs_to_check = [
                song_id for song_id in listened_songs[user][like]
                if song_id not in entered_ids]
            for song_id in songs_to_check:
                point_list.append(point_dict[song_id])
        print(i, '/', len(listened_songs))
        i += 1
        for song in song_dict:
            song_dict[song]['points'] = 0
    print('average points for bad songs', np.mean(bad_points))
    print('average points for medium songs', np.mean(medium_points))
    print('average points for good songs', np.mean(good_points))
예제 #2
0
def changerfe():
    listened_songs = get_listened_songs(limits=LIMIT_LIST)[0]
    song_dict = read_song_dict_w_labels()
    rf_songs = []
    for user in listened_songs:
        for song in listened_songs[user]['good']:
            rf_songs.append(song)
    for song in song_dict:
        song_dict[song]['rf_enterable'] = song in rf_songs
    i = 1
    for item in chunks(song_dict):
        with open(MSD_DATA_LABELED_PART_FILE_PATH.format(i), 'w') as f:
            json.dump(item, f)
            i += 1
            f.close()
예제 #3
0
def get_rnd_good_songs(song_dict, amount, limits=LIMIT_LIST):
    '''
    Get an amount of random song ids that at least one user has in his
    good category.
    '''
    listened_songs = get_listened_songs(limits=limits)[0]
    # find all songs that at least one user has in his good category
    good_songs = []
    for user in listened_songs.keys():
        good_songs.extend(listened_songs[user]['good'])
    song_ids_2_pop = []
    song_dict_temp = song_dict.copy()
    for song_id in song_dict_temp:
        if song_id not in good_songs:
            song_ids_2_pop.append(song_id)
    # remove all songs that aren't good
    for song_id in song_ids_2_pop:
        song_dict_temp.pop(song_id)
    return random.sample(song_dict_temp.keys(), amount)
예제 #4
0
def test_rf():
    ''' Tests different Random Forest configurations. '''
    song_dict = read_song_dict_w_labels()
    results = {}
    with open(RF_TEST_FILE_PATH, 'w') as outfile:
        for i in range(1, 20):  # test 20 different random limits
            ent_amount = random.randint(1, 5)  # 1-5 songs as entered songs
            limits = get_rnd_limits()
            entered_ids = get_rnd_good_songs(
                song_dict, ent_amount, limits=limits)
            # all users and the songs they've listened to
            listened_songs = get_listened_songs(limits=limits)[0]
            # get all combinations of PARS containing 3-6 parameters
            par_combinations = []
            for i in range(3, len(PARS) + 1):
                listing = [list(x) for x in itertools.combinations(PARS, i)]
                par_combinations.extend(listing)
            # test for each combination
            for par_combination in par_combinations:
                # lists for building the Random Forest
                rf_pars = []
                rf_targets = []
                rf_ids = []  # tracks the songs that are already included
                rf_targets_quant = []  # tracks how often each song is already included
                for user in listened_songs.keys():
                    # only users are taken into account to build the random forest that
                    # have at least one of the entered songs in their 'good' category
                    contains_good_song = False
                    for song_id in entered_ids:
                        contains_good_song = song_id in listened_songs[user]['good']
                    if contains_good_song:
                        for like, like_value in zip(['bad', 'medium', 'good'], [1, 2, 3]):
                            for song_id in listened_songs[user][like]:
                                if song_id not in rf_ids:
                                    rf_ids.append(song_id)
                                    rf_pars.append([song_dict[song_id][par]
                                                    for par in par_combination])
                                    rf_targets_quant.append(1)
                                    rf_targets.append(like_value)
                                # if a value already exists for that song, take the
                                # average of all values
                                else:
                                    index = rf_ids.index(song_id)
                                    rf_targets_quant[index] += 1
                                    rf_targets[index] = (rf_targets[index] + like_value) \
                                        / rf_targets_quant[index]
                # convert integers back to bad, medium, and good category strings
                for index in range(0, len(rf_targets)):
                    if int(rf_targets[index]) == 1:
                        rf_targets[index] = 'bad'
                    elif int(rf_targets[index]) == 2:
                        rf_targets[index] = 'medium'
                    else:
                        rf_targets[index] = 'good'
                scores = []
                for j in range(0, 3):  # build the RF 3 times and take the average score
                    # split into training and test data 70/30
                    training_indices = random.sample(
                        range(0, len(rf_targets)), int(0.7 * len(rf_targets)))
                    test_indices = [index for index in range(0, len(rf_targets))
                                    if user not in training_indices]
                    rf_targets_train = [rf_targets[index]
                                        for index in training_indices]
                    rf_targets_test = [rf_targets[index]
                                       for index in test_indices]
                    rf_pars_train = [rf_pars[index]
                                     for index in training_indices]
                    rf_pars_test = [rf_pars[index] for index in test_indices]
                    # build the Random Forest
                    rf = RandomForestClassifier()
                    rf.fit(rf_pars_train, rf_targets_train)
                    scores.append(rf.score(rf_pars_test, rf_targets_test))
                outfile.write(
                    'Limits: [' + str(limits[0]) + ', ' + str(limits[1]) + ']   ')
                outfile.write('Songs entered: ' + str(ent_amount) + '   ')
                par_comb_string = ''
                for i in range(0, len(par_combination) - 1):
                    par_comb_string += par_combination[i] + ', '
                par_comb_string += par_combination[len(par_combination) - 1]
                outfile.write('Par-combination: [' + par_comb_string + ']   ')
                avg_score = np.mean(scores)
                outfile.write('Average score: ' + str(avg_score) + '\n')
                for arg in [limits, ent_amount, par_combination]:
                    if str(arg) not in results.keys():
                        results[str(arg)] = []
                    results[str(arg)].append(avg_score)
        for key in results.keys():
            outfile.write('Average score for ' + key + ': ' + str(np.mean(results[key])) + '\n')
예제 #5
0
def recommend_w_rf(song_dict, entered_ids, amount):
    '''
    Recommend an amount of songs for specific entered songs. Only songs
    that a Random Forest predicts as 'good' are taken into account.

    song_dict: Dictionary containing all song information

    entered_ids: the song ids entered by the user

    amount: how many songs to recommend
    '''
    # lists for building the Random Forest
    rf_pars = []
    rf_targets = []
    rf_ids = []  # tracks the songs that are already included
    rf_targets_quant = []  # tracks how often each song is already included
    # all users and the songs they've listened to
    listened_songs = get_listened_songs(limits=LIMIT_LIST)[0]
    for user in listened_songs.keys():
        # only users are taken into account to build the random forest that
        # have at least one of the entered songs in their 'good' category
        contains_good_song = False
        for song_id in entered_ids:
            contains_good_song = song_id in listened_songs[user]['good']
        if contains_good_song:
            for like, like_value in zip(['bad', 'medium', 'good'], [1, 2, 3]):
                for song_id in listened_songs[user][like]:
                    if song_id not in rf_ids:
                        rf_ids.append(song_id)
                        rf_pars.append([
                            song_dict[song_id][par] for par in CHOSEN_PARS_RF
                        ])
                        rf_targets_quant.append(1)
                        rf_targets.append(like_value)
                    # if a value already exists for that song, take the
                    # average of all values
                    else:
                        index = rf_ids.index(song_id)
                        rf_targets_quant[index] += 1
                        rf_targets[index] = (rf_targets[index] + like_value) \
                            / rf_targets_quant[index]
    # convert integers back to bad, medium, and good category strings
    for index in range(0, len(rf_targets)):
        if int(rf_targets[index]) == 1:
            rf_targets[index] = 'bad'
        elif int(rf_targets[index]) == 2:
            rf_targets[index] = 'medium'
        else:
            rf_targets[index] = 'good'
    # build the Random Forest
    rf = RandomForestClassifier()
    rf.fit(rf_pars, rf_targets)
    # predict bad, medium, or good category for each song in the data
    ids = []
    songs_to_predict = []
    for song_id in song_dict:
        songs_to_predict.append(
            [song_dict[song_id][par] for par in CHOSEN_PARS_RF])
        ids.append(song_id)
    predictions = rf.predict(songs_to_predict)
    # points are given only to songs that are predicted in the good category
    good_indices = []  # get the indices of the good songs
    for i in range(0, len(predictions)):
        if predictions[i] == 'good':
            good_indices.append(i)
    ids_to_give_points = [
        ids[index] for index in good_indices if ids[index] not in entered_ids
    ]
    # for each entered song, distribute points to all good songs
    for song_id in entered_ids:
        song_dict = distribute_points(ids_to_give_points, song_dict, song_id,
                                      CHOSEN_PARS_RF)
    return print_recommendation(song_dict, entered_ids, amount)