Esempio n. 1
0
def predict(rating_dic):

    df_clean = pd.read_csv("dataset_clean.csv")
    #######################
    # Fit surprise model
    #######################

    final_model = KNNBaseline(k=60, min_k=2, sim_options={'name': 'pearson_baseline', 'user_based': True})

    new_user_id = max(df_clean["userID"]) + 1
    ratings = np.array(list(rating_dic.values()))
    rated_mask = ratings != None
    ratings = ratings[rated_mask]
    items = np.array(list(rating_dic.keys()))[rated_mask]
    user = np.ones(len(items), dtype="int") * new_user_id
    new_user_df = pd.DataFrame({"userID": user, "itemID": items, "rating": ratings})

    total_df = df_clean.append(new_user_df)

    # A reader is still needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(0, 10))

    # The columns must correspond to user id, item id and ratings (in that order).
    new_trainset = Dataset.load_from_df(total_df, reader).build_full_trainset()

    ## Fit the best model

    final_model.fit(new_trainset)

    predicted_ratings = []
    for nootropic in nootropics_list:
        predicted_ratings.append(final_model.predict(new_user_id, nootropic).est)

    item_baselines = final_model.default_prediction() + final_model.compute_baselines()[
        1]  # mean rating + item baseline ?

    result_df = pd.DataFrame(
        {"nootropic": nootropics_list, "predicted_rating": predicted_ratings, "baseline_rating": item_baselines})

    nootropics_without_ratings = [nootropic for nootropic in nootropics_list if (nootropic not in rating_dic.keys())]
    new_result_df = result_df[result_df["nootropic"].isin(nootropics_without_ratings)]
    return new_result_df.sort_values("predicted_rating", ascending=False, ignore_index=True)
Esempio n. 2
0
def get_item_baseline():
    df_clean = pd.read_csv("dataset_clean.csv")
    #######################
    # Fit surprise model
    #######################

    final_model = KNNBaseline(k=60, min_k=2, sim_options={'name': 'pearson_baseline', 'user_based': True})

    total_df = df_clean

    # A reader is still needed but only the rating_scale param is requiered.
    reader = Reader(rating_scale=(0, 10))

    # The columns must correspond to user id, item id and ratings (in that order).
    new_trainset = Dataset.load_from_df(total_df, reader).build_full_trainset()

    ## Fit the best model

    final_model.fit(new_trainset)

    item_baselines = final_model.default_prediction() + final_model.compute_baselines()[
        1]  # mean rating + item baseline ?

    return pd.DataFrame({"nootropic": nootropics_list, "item_baselines":item_baselines})