Exemple #1
0
class RecommenderSVDpp(Recommender):
    def __init__(self, recommendation_dataset: RecommendationDataSet):
        super(RecommenderSVDpp, self).__init__(recommendation_dataset.movies)
        self.algorithm = SVDpp()
        self.recommendation_dataset = recommendation_dataset

    def fit(self, dataset):
        return self.algorithm.fit(dataset)

    def test(self, test_set):
        return self.algorithm.test(test_set)

    def get_recommendation(self, watched, k=20):
        # get dataset 
        new_user_id, full_dataset = self.recommendation_dataset.get_dataset_with_extended_user(watched)
        inner_user_id = full_dataset.to_inner_uid(new_user_id)

        # after new dataset we need again train our model with the new user for the whole 
        # dataset with the new user.
        self.algorithm.fit(full_dataset)

        # watched movies
        watched = {full_dataset.to_inner_iid(key): value for key,value in watched.items()}

        # Calculate for all similar user, predictions
        test_items = [
            self.algorithm.predict(new_user_id, full_dataset.to_raw_iid(i))
            for i in range(0, full_dataset.n_items)
            if i not in watched
        ]

        topn_items = [i[0] for i in get_top_n(test_items, n=k, minimum_rating=1.0)[new_user_id]]
        return self.movies.get_movie_by_movie_ids(topn_items)
Exemple #2
0
def SVDPP(PointFrame, RecommendNum=10, TypeNum=5):
    OutUserList = []
    OutFundList = []
    PointFrameList = []
    UserType = 0
    # 拆分评分矩阵为5类:
    for Type in range(5):
        PointFrameList.append(PointFrame.ix[PointFrame.Type == Type])
    # 对每一类用户分别评分:
    for Frame in PointFrameList:
        Frame = Frame.loc[:, 'User':'******']
        UserList = Frame.User.unique()
        FundList = Frame.FundCode.unique()
        UserType = UserType + 1
        reader = Reader(rating_scale=(0, 2))
        data = Dataset.load_from_df(Frame, reader=reader).build_full_trainset()
        if UserType == 4:
            model = SVDpp(n_factors=5)
        else:
            model = SVDpp()
        model.fit(data)
        for User in UserList:
            UserPointList = []
            for Fund in FundList:
                UserPointList.append(model.predict(User, Fund).est)
            RecommendList = np.argsort(UserPointList)[::-1][0:RecommendNum]
            for FundIndex in RecommendList:
                OutUserList.append(User)
                OutFundList.append(FundList[FundIndex])
    OutFrame = pd.DataFrame({
        "User": OutUserList,
        "RecommendFundCode": OutFundList
    })
    return OutFrame
Exemple #3
0
        def svd(user_id, area):
            algo = SVDpp()
            algo = SVDpp(n_factors=100, n_epochs=15)
            # 3. train model 저장
            file_name = os.path.expanduser('./dump')
            #dump.dump(file_name, algo=algo) # 한번 학습하고 여기는 주석처리
            _, algo = dump.load(file_name)

            Area = pd.read_csv('./area.csv')  ## { 상품아이디(학습데이터), area, 상품ID }

            #nowarea="C"
            #user=str("A2CX7LUOHB2NDG") # usre ID 받아오기
            neww = Area[Area['area'] == area]['productID'].tolist()  # 구역 받아오기
            predictions = [
                algo.predict(str(user_id), str(productID))
                for productID in neww
            ]  # 예측

            ######
            def sortkey_est(pred):
                return pred.est

            predictions.sort(key=sortkey_est, reverse=True)
            #print(predictions)
            top_product_id = [int(pred.iid) for pred in predictions]
            top_product_id = top_product_id[:5]
            return top_product_id
Exemple #4
0
class SvdPP(RecommenderBase):

    """
        SVDpp algorithm.
        Actually woring bad, just a draft
    """

    def __init__(self, URM):

        print('train set built')
        # double check if training set is built fine for sgd
        # for u, i, r in self.trainset.all_ratings():
        #     a = 1

    def fit(self, urm, n_factors=20, n_epochs=20, lr_all=0.007, reg_all=0.02, init_mean=0,
            init_std_dev=0.1, verbose=True):
        # create the training set
        r, c = urm.nonzero()
        ones = np.ones(len(r), dtype=np.int32)
        d = np.vstack((r, c, ones)).transpose()
        df = pd.DataFrame(d)
        df.columns = ['userID', 'itemID', 'rating']
        reader = Reader()
        data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)
        self.trainset = data.build_full_trainset()

        # fit
        self.algo = SVDpp(n_factors=n_factors, n_epochs=n_epochs, lr_all=lr_all, init_mean=init_mean,
                          init_std_dev=init_std_dev, verbose=verbose)
        self.algo.fit(self.trainset)

    def recommend(self, userid, N=10, urm=None, filter_already_liked=True, with_scores=True, items_to_exclude=[]):
        if len(items_to_exclude) > 1:
            raise NotImplementedError('Items to exclude functionality is not implemented yet')

        r = np.empty([1])
        for i in range(d.N_TRACKS):
            p = self.algo.predict(userid, i)
            r = np.array([p[3]]) if i == 0 else np.concatenate((r, np.array([p[3]])))

        if filter_already_liked:
            if urm == None:
                raise ValueError('Please provide a URM in order to items already liked')
            else:
                r[urm.getrow(userid).nonzero()[1]] = 0

        l = [userid]
        ind = np.argpartition(r, -10)[-10:]
        for i in ind:
            if with_scores:
                l.append((i, r[i]))
            else:
                l.append(i)
        return l
Exemple #5
0
def computeSVDpp(data, test_np):
    """Compute the SVD++ method and return the predictions on the test
     The method has the following parameter:
         - Number of factors : 6
         - All regularization parameter : 0.025
         
         data : data frame which represent the train set
         test_np : data frame on which the prediction will be returned
         
         return : test_np with a column of prediction named 'svdpp_rating'"""
    trainset, test = dataTrainSurprise(data, test_np)
    
    svdpp_algo = SVDpp(n_factors = 6, reg_all=0.025).fit(trainset)
    
    test['svdpp_rating'] = test[['user_id', 'movie_id']] \
    .apply(lambda row: svdpp_algo.predict(row['user_id'], row['movie_id'])[3], axis=1)
    
    return test
Exemple #6
0
def SVD_pp():
    algo = SVDpp()

    # 定义K折交叉验证迭代器,k=3
    kf = KFold(n_splits=3)
    for trainset, testset in kf.split(data):
        # 训练并预测
        algo.fit(trainset)
        predictions = algo.test(testset)
        # 计算RMSE
        accuracy.rmse(predictions, verbose=True)  # verbose 输出当前跌代,默认False

    uid = str(196)
    iid = str(302)
    # 输出uid对iid的预测结果
    pred = algo.predict(uid, iid, r_ui=4, verbose=True)

    time2 = time.time()
    print(time2 - time1)
class RecommenderSVDppSimilarUsers(Recommender):
    """ 
        Instead of building new dataset when the new user is in, we get similar users,
        and based on that try to get similar movies
    """
    def __init__(self, movies):
        super(RecommenderSVDppSimilarUsers, self).__init__(movies)
        self.algorithm = SVDpp()

    def fit(self, dataset):
        return self.algorithm.fit(dataset)

    def test(self, test_set):
        return self.algorithm.test(test_set)

    def get_recommendation(self, watched, k=20, k_inner_item=10):
        # get dataset
        full_dataset = self.algorithm.trainset

        # watched movies
        watched = {
            full_dataset.to_inner_iid(key): value
            for key, value in watched.items()
        }

        # get similar users
        similar_users = self.get_similar_user_ids(watched, k=k_inner_item)

        # Calculate for all similar user, predictions
        candidates = defaultdict(float)
        for inner_move_id in range(0, full_dataset.n_items):
            if inner_move_id not in watched:
                movie_id = full_dataset.to_raw_iid(inner_move_id)
                for inner_user_id, similarity in similar_users.items():
                    prediction = self.algorithm.predict(
                        full_dataset.to_raw_uid(inner_user_id), movie_id)
                    candidates[movie_id] += similarity * prediction.est

        # heapq.nlargest(k, candidates.items(), key=itemgetter(1))
        return self.movies.get_movie_by_movie_ids(
            heapq.nlargest(k, candidates, key=candidates.get))
def svdpp(train, test, ids, Xtest, Xids):
    """
    Extension of svd taking the implicit ratings into account
    Argument : train, the trainset
               test, the testset
               ids, unknown ratings
               Xtest, predicted ratings for testset, to be used for final blending
               Xids, predicted ratings for unknown ratings, to be used for final blending
    """
    print('SVD++')
    algo = SVDpp(n_factors=100,
                 n_epochs=10,
                 lr_all=0.0015,
                 reg_all=0.05,
                 random_state=15)

    #Train algorithm on training set
    algo.fit(train)

    #Predict on train and compute RMSE
    predictions = algo.test(train.build_testset())
    print('   Training RMSE: ', accuracy.rmse(predictions, verbose=False))

    #Predict on test and compute RMSE
    predictions = algo.test(test)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   Test RMSE: ', rmse)

    preds_test = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds_test[j] = pred.est

    #Predict unknown ratings
    preds_ids = []
    for i in range(len(ids[0])):
        pred = algo.predict(str(ids[0][i]), str(ids[1][i]))
        preds_ids.append(pred.est)

    Xtest.append(preds_test)
    Xids.append(preds_ids)
    return rmse, Xtest, Xids, preds_test, preds_ids
Exemple #9
0
def SVDPPThreadFuc(Frame):
    OutUserList = []
    OutFundList = []
    Frame = Frame.loc[:, 'User':'******']
    UserList = Frame.User.unique()
    FundList = Frame.FundCode.unique()
    reader = Reader(rating_scale=(0, 2))
    data = Dataset.load_from_df(Frame, reader=reader).build_full_trainset()
    model = SVDpp()
    model.fit(data)
    for User in UserList:
        UserPointList = []
        for Fund in FundList:
            UserPointList.append(model.predict(User, Fund).est)
        RecommendList = np.argsort(UserPointList)[::-1][0:10]
        for FundIndex in RecommendList:
            OutUserList.append(User)
            OutFundList.append(FundList[FundIndex])
    OutFrame = pd.DataFrame({
        "User": OutUserList,
        "RecommendFundCode": OutFundList
    })
    return OutFrame
Exemple #10
0
def svd_model(df):
    """ Apply SVD.
    """
    df = pd.melt(df,
                 id_vars='smiles',
                 value_vars=list(df.columns[1:]),
                 var_name='Target',
                 value_name='TargetValue')

    mark = df.TargetValue.isna()
    unknown = df.loc[mark]
    known = df.loc[~mark]

    reader = Reader(rating_scale=(0, 1))
    data = Dataset.load_from_df(known[['smiles', 'Target', 'TargetValue']],
                                reader)

    kf = KFold(n_splits=3, random_state=57)

    algo = SVDpp(n_factors=12, reg_all=0.003, lr_all=0.006, random_state=132)

    for trainset, testset in kf.split(data):

        algo.fit(trainset)
        predictions = algo.test(testset)

        rmse = round(accuracy.rmse(predictions, verbose=True), 3)

        print('RMSE of SVD model for cross validation' + str(rmse))

    result = unknown.copy()
    result['ToxicProb'] = result.apply(
        lambda x: algo.predict(x.smiles, x.Target).est, axis=1)
    result = result.drop(columns='TargetValue')

    return result
Exemple #11
0
uid = str(196)
iid = str(302)
algo1.predict(uid, iid, r_ui=4, verbose=True)   # 输出uid对iid的预测结果
print('-'*30)

"""SVDbias"""
print('SVDbias结果:')
time1=time.time()
algo2.fit(train_s)
pred = algo2.test(test_s)
accuracy.rmse(pred, verbose=True)
time2=time.time()
print('SVDbias用时: %.2fs' % (time2-time1))
uid = str(196)
iid = str(302)
algo2.predict(uid, iid, r_ui=4, verbose=True)
print('-'*30)

"""SVD++"""
print('SVD++结果:')
time1=time.time()
algo3.fit(train_s)
pred = algo3.test(test_s)
accuracy.rmse(pred, verbose=True)
time2=time.time()
print('SVD++用时: %.2fs' % (time2-time1))
uid = str(196)
iid = str(302)
algo3.predict(uid, iid, r_ui=4, verbose=True)
print('-'*30)
# Load the movielens-100k dataset  UserID::MovieID::Rating::Timestamp
data = Dataset.load_builtin('ml-1m')
trainset, testset = train_test_split(data, test_size=.15)

# Configura o algoritmo. K = número de vizinhos. Name = Tipo de medida de similiradade. User based = filtragem por usuário ou item.

algoritmo = SVDpp(n_epochs=5)

algoritmo.fit(trainset)

# Selecionamos o usuário e o filme que será analisado
# User 49. Tem entre 18 e 24 anos. É programador e mora em Huston, Texas
uid = str(49)
# Filme visto e avaliado: Negotiator, The (1998)::Action|Thriller. Avaliação 4
iid = str(2058)  # raw item id

# get a prediction for specific users and items.
pred = algoritmo.predict(uid, iid, r_ui=4, verbose=True)

# run the trained model against the testset
test_pred = algoritmo.test(testset)

# Avalia RMSE
print("Avaliação RMSE: ")
accuracy.rmse(test_pred, verbose=True)

# Avalia MAE
print("Avaliação MAE: ")
accuracy.mae(test_pred, verbose=True)
Exemple #13
0
    dfRatings = pd.read_csv(sys.argv[1])
    dfTest = pd.read_csv(sys.argv[2])

    # Delete unused columns
    del dfRatings['date']
    del dfRatings['train_id']
    del dfTest['date']
    del dfTest['test_id']

    # Set the rating scale and create the data for Surprise to use
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(
        dfRatings[['user_id', 'business_id', 'rating']], reader)

    train_set = data.build_full_trainset()

    # Use SVD with surprise
    algo = SVDpp()
    algo.train(train_set)

    f = open('SVDOutput.csv', 'w')
    f.write("test_id,rating\n")
    for i in range(len(dfTest)):
        prediction = algo.predict(dfTest.at[i, 'user_id'],
                                  dfTest.at[i, 'business_id'],
                                  r_ui=4,
                                  verbose=True)
        predRating = prediction.est
        f.write(str(i) + "," + str(predRating) + '\n')

    f.close()
Exemple #14
0
trainset = data.build_full_trainset()
#testset = data1.build_full_trainset()
# Use the famous SVD algorithm.
algo = SVDpp()

# Run 5-fold cross-validation and print results.
#cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
algo.fit(trainset)
'''
predictions = algo.test(testset)

print(predictions)
'''
'''
uid = str(0)  # raw user id (as in the ratings file). They are **strings**!
iid = str(35546)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, verbose=True)
print(pred[3])
'''
testset = open("../test1.csv", "r")
result = open("result_SVD++.txt", "w")

for line in testset:
    temp = line.split(",")
    pred = algo.predict(temp[0], temp[1], verbose=True)
    score = round(pred[3])
    #print()
    result.write(str(score) + "\n")
Exemple #15
0
# Reload
_, algo = dump.load(filename)

# Show distribution of ratings by users
df_users['user'].value_counts()
df_users['title'].value_counts()
df_users[df_users['user'] == 'lschmidt']

# For a given user and recipe, compare true rating with predicted rating
uid = 'lschmidt'
iid = 'acorn-squash-with-kale-and-sausage-51203850'
r = float(df_users.loc[(df_users['user'] == uid) & (df_users['title'] == iid),
                       'rating'].values)

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r_ui=r, verbose=True)


# I can try this for all recipes this user liked
def show_user_predictions(uid, df, algo):
    rated_recipes = df.loc[df['user'] == uid, 'title'].values
    for iid in rated_recipes:
        r = float(df.loc[(df['user'] == uid) & (df['title'] == iid),
                         'rating'].values)
        pred = algo.predict(uid, iid, r_ui=r, verbose=True)
        print(pred)


show_user_predictions('lschmidt', df_users, algo)

Exemple #16
0
results_df.to_csv('svdpp_grid_search.csv')

#entrenar con todo y los mejores parametros
algo = SVD(n_epochs=100, lr_all=0.002, reg_all=0.2)
trainset = data.build_full_trainset()
algo.train(trainset)

#predict con test

test_ambiente = pd.DataFrame()

for i in range(0, len(test.index)):
    variable = pd.DataFrame(
        pd.Series(
            algo.predict(
                test.id_usuario.astype(str)[i],
                test.id_restaurante.astype(str)[i]).est).values)
    test_ambiente = test_ambiente.append(variable, ignore_index=True)

## busqueda de rating_comida

train[['id_usuario', 'id_restaurante', 'rating_comida',
       'fecha']].to_csv('surprise_comida.csv', index=False)

file_path = 'surprise_comida.csv'

reader = Reader(line_format='user item rating timestamp',
                sep=',',
                skip_lines=1)

data = Dataset.load_from_file(file_path, reader=reader)
Exemple #17
0
## first model and training
reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(user_book_rate, reader)
trainset = data.build_full_trainset()

algo = SVDpp(n_factors=100,n_epochs=300,lr_all=0.01,reg_all=0.2)
algo.fit(trainset)


# user_latent = algo.pu
# book_latent = algo.qi


## final result of first model
final_df = user_data[['User ID', 'User Read Books (2017)', 'User Read Books (2018)', 'Average Rating (2017)']]
final_ar = [[a[0], (len(a[1].split(', ')) * float(a[3]) + sum([algo.predict(a[0], int(b)).est for b in a[2].split(', ')])) / (len(a[1].split(', ')) + len(a[2].split(', ')))] for a in final_df.values]


## user difficulty embedding
user_diff_ar = user_data['User Difficulty Choice'].values
mlb = MultiLabelBinarizer(classes = [1,2,3,4,5])
user_diff_code = mlb.fit_transform([([int(a)]) if a in '12345' else (1,2,3,4,5) for a in user_diff_ar])
dic_user_diff = dict(zip(user_data['User ID'].values, user_diff_code))

## book difficulty embedding
book_diff_ar = book_info['Difficulty (Reader suggested)'].values
mlb = MultiLabelBinarizer(classes = [1,2,3,4,5])
book_diff_code = mlb.fit_transform([([int(a)]) for a in book_diff_ar])
dic_book_diff = dict(zip(book_info['Book ID'].values, book_diff_code))

## book genre embedding
alg.fit(data_train.build_full_trainset())

end = time.time()
print("***********************************************")
print("Exe time:")
print(end - start)

# %% Loading Test Data
file_path = "Data/sample_submission.csv"
data_test = utils.load_data_desired(file_path)

# %% Prediction
Predict_Test = []

for line in data_test:
    Predict_Test.append(alg.predict(str(line[1]), str(line[0])).est)

# %% Save Prediction
file = open("Details.txt", "w")

file.write("+ Best Score: \n \n")
file.write(str(Train_CV.best_score) + "\n \n")
file.write("************************************************************ \n")
file.write("+ Best Param: \n \n")
file.write(str(Train_CV.best_params) + "\n \n")
file.write("************************************************************ \n")
file.write("+ CV Summary: \n \n")
file.write(str(Train_CV.cv_results) + "\n \n")
file.write("************************************************************ \n")

file.close()
Exemple #19
0
    user_inner_id = 300
    print('用户内部id', user_inner_id)
    user_rating = trainset.ur[user_inner_id]
    print('用户评价过的歌曲数量', len(user_rating))
    items = map(lambda x:x[0], user_rating)
    real_song_id=[]
    real_song_name=[]
    for song in items:
        real_song_id.append(algo.trainset.to_raw_iid(song))
        real_song_name.append(song_id_name_dict[algo.trainset.to_raw_iid(song)])
        
    t_l=10
    song_list1=list(song_id_name_dict.keys())
    rank=[]
    for song in song_list1:
        rank.append(algo.predict(str(user_inner_id), str(song))[3])
    rank=Series(rank)
    rank1=rank.sort_values(ascending=False)
    predict_song_id=[]
    predict_song_name=[]
    for i in range(t_l):
        predict_song_id.append(song_list1[list(rank1.index)[i]])
        predict_song_name.append(song_id_name_dict[song_list1[list(rank1.index)[i]]])
#from pandas import Series
    a=Series(real_song_name)
    b=Series(predict_song_name)
    c=pd.DataFrame({'real':a,'predict':b})
    
    #t_l=20   #取top的长度
    #if len(user_rating)<=t_l:
    #    pre_song=list(rank1.index[range(t_l)])
# %% [markdown]
# ## Deployed App
#
#
# [Link to the deployed streamlit app](###)

# %%
# Prepare Kaggle submission

test = pd.read_csv('test.csv')

# Make predictions on test data
pred_list = []

for _, row in test.iterrows():
    x = (SVDpp_model.predict(row.userId, row.movieId))
    pred = x[3]
    pred_list.append(pred)

# %%
# Convert values to strings

test['userId'] = test['userId'].astype(str)
test['movieId'] = test['movieId'].astype(str)

# %%
# Create submission column

test['Id'] = test['userId'] + '_' + test['movieId']

# %%
Exemple #21
0
    def post(self, request):
        heydict = dict(request.POST.lists())
        user = heydict['usuario'][0]
        #Database
        server = 'LOCALHOST\\SQLEXPRESS' 
        database = 'MoviesHub' 
        username = '******' 
        password = '******'  
        cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
        cursor = cnxn.cursor()
        
        queryMoviesRatings = "SELECT * FROM MoviesRatings WHERE movieId<250;"

        df_movies = pd.read_sql(queryMoviesRatings, cnxn)

        #Este dataframe tiene que tener siempre los ID de los usuarios, Id de peliculas y ratting dado por un usuario
        df_movies_to_model = df_movies[df_movies.columns[:-3]]

        queryRecommend = "SELECT title, genres FROM RecommendedMovies WHERE userId = {0};".format(user)
        df_recommend = pd.read_sql(queryRecommend, cnxn)
        queryErrores = "SELECT rmse FROM Errores WHERE userId = {0};".format(user)
        df_errores = pd.read_sql(queryErrores, cnxn)

        #Creamos una función que pasandole, un usuario, un DataFrame, un algoritmo y el número de recomendaciones que queremos
        def recommend_system(userId, dataframe, algorithm, n_commends):
            movie_ids = dataframe['movieId'].to_list()
            movies_watched = dataframe[dataframe["userId"] == userId]["movieId"]
            movies_no_watched = [movie for movie in movie_ids if movie not in movies_watched]

            preds = [algorithm.predict(uid=userId, iid=movie) for movie in movies_no_watched]
            commends_ratting = {pred[1]:pred[3] for pred in preds}
            order_dict = {k: v for k, v in sorted(commends_ratting.items(), key=lambda item: item[1])}

            top_predictions = list(order_dict.keys())[:n_commends]

            return dataframe[dataframe["movieId"].isin(top_predictions)][["title", "genres"]].drop_duplicates()

        if(len(df_recommend.index) == 0):
            #Usamos Reader() del paquete Surprise para poner los datos en el formato que nos piden los algoritmos
            reader = Reader()
            data = Dataset.load_from_df(df_movies_to_model, reader)

            #Separo en train y test
            train, test = train_test_split(data, test_size=0.25)

            #Instanciamos el algoritmo y entrenamos
            svd = SVDpp()
            svd.fit(train)
            preds = svd.test(test)

            #Métricas de evaluacin
            # mae = accuracy.mae(preds)
            rmse = accuracy.rmse(preds)
            rmse = rmse * 100
            rmse = format(rmse, '.2f')


            cursor.execute("INSERT INTO Errores (userId,rmse) values(?,?)", user, rmse)

            # Creamos todo el dataset completo con Train y Test
            trainfull = data.build_full_trainset()

            #Instanciamos de nuevo el algoritmo
            svd = SVDpp()
            #Entrenamos el algoritmo
            svd.fit(trainfull)


            #realizamos una prediccin para ver que todo funciona
            svd.predict(uid=1, iid=1)
            movies_recommended = recommend_system(user, df_movies, svd, 10)

            for index, row in movies_recommended.iterrows():
                cursor.execute("INSERT INTO RecommendedMovies (userId,movieId,title,genres) values(?,?,?,?)", user, index, row.title, row.genres)
            cnxn.commit()
            cursor.close()

            df_recommend = pd.read_sql(queryRecommend, cnxn)
            df_errores = pd.read_sql(queryErrores, cnxn)
        
        context = {
            'title': "Recomendaciones"
        }
        return render(request, "home/recomendacionesUser.html",{'df': df_recommend.values, 'user':user, 'dfErrores': df_errores.values}, context)
Exemple #22
0
#%%

reader = surprise.Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(merge, reader)
del merge

train, test = train_test_split(data, random_state=123, test_size=0.1)
#%%训练模型(未调参)
algo = SVDpp()  #声明模型
algo.biased = False

algo.fit(train)

predictions = algo.test(test)
accuracy.mae(predictions)
a = algo.predict('15cbc496d67626ad90514b4243e7c045', '2204590')
print(a)
dump.dump(file_name='SVDmodel.pkl', algo=algo)
#%%
algo = dump.load('best_model.pkl')[1]
#%%瞎猜模型(供对比)
algocompare = surprise.NormalPredictor()
algocompare.fit(train)
preCompare = algocompare.test(test)
accuracy.mae(preCompare)

#%%计算precision and recall
## code from scikit-surprise documentation FAQs
from collections import defaultdict

Exemple #23
0
users = matrix.userid.unique()
movies = matrix.movieid.unique()
movies.sort()
users.sort()

# Getting estimations and creating the output file according to the guidelines

my_recs = []
for uid in users:
    user_rats = np.array(user_item_matrix.iloc[uid - 1])
    for iid in movies:
        rating = user_rats[iid - 1]
        if rating not in range(1,
                               6):  # Check if user is already rated the movie

            estimation = algo.predict(uid=uid, iid=iid).est

            if isinstance(estimation, int):
                my_recs.append((uid, iid, estimation))
            else:
                my_recs.append((uid, iid, estimation.round()))
        else:
            my_recs.append((uid, iid, rating))

output = pd.DataFrame(my_recs,
                      columns=['uid', 'iid',
                               'predictions']).sort_values(['uid', 'iid'],
                                                           ascending=True)
np.savetxt(r'submit_sample.txt', output.values, fmt='%d')

print("submit_sample.txt created.")
trainset = data.build_full_trainset()
# algo = SVDpp(n_factors=2,n_epochs=75,lr_all=0.05,reg_all=0.1)
algo = SVDpp(n_factors=3, n_epochs=300, lr_all=0.01, reg_all=0.2)
algo.fit(trainset)
user_latent = algo.pu
book_latent = algo.qi

## final result of first model
final_df = user_data[[
    'User ID', 'User Read Books (2017)', 'User Read Books (2018)',
    'Average Rating (2017)'
]]
final_ar = [[
    a[0],
    (len(a[1].split(', ')) * float(a[3]) +
     sum([algo.predict(a[0], int(b)).est for b in a[2].split(', ')])) /
    (len(a[1].split(', ')) + len(a[2].split(', ')))
] for a in final_df.values]

## user difficulty embedding
user_diff_ar = user_data['User Difficulty Choice'].values
mlb = MultiLabelBinarizer(classes=[1, 2, 3, 4, 5])
user_diff_code = mlb.fit_transform([([int(a)]) if a in '12345' else
                                    (1, 2, 3, 4, 5) for a in user_diff_ar])
dic_user_diff = dict(zip(user_data['User ID'].values, user_diff_code))

## book difficulty embedding
book_diff_ar = book_info['Difficulty (Reader suggested)'].values
mlb = MultiLabelBinarizer(classes=[1, 2, 3, 4, 5])
book_diff_code = mlb.fit_transform([([int(a)]) for a in book_diff_ar])
dic_book_diff = dict(zip(book_info['Book ID'].values, book_diff_code))
Exemple #25
0
# cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
# # Run 5-fold cross-validation and print results.
# algo.fit(trainset)
# print(algo.predict('5218791','100642618'))
# # Use the famous SVD algorithm.
# algo = SlopeOne()
# cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
# # Run 5-fold cross-validation and print results.
# algo.fit(trainset)
# print(algo.predict('5218791','100642618'))
# algo = KNNWithMeans(sim_options=sim_options)
# cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
# # Run 5-fold cross-validation and print results.
# algo.fit(trainset)
# print(algo.predict('5218791','100642618'))
# algo = KNNWithZScore(sim_options=sim_options)
# cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
# # Run 5-fold cross-validation and print results.
# algo.fit(trainset)
# print(algo.predict('5218791','100642618'))
# algo = CoClustering(n_cltr_u=300, n_cltr_i=600, n_epochs=100, verbose=True)
# cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
# Run 5-fold cross-validation and print results.
# algo.fit(trainset)
print(algo.predict('5218791', '100648984'))
print(algo.estimate('5218791', '100648984'))
print(algo.predict('52550', '100644648'))
print(algo.estimate('52550', '100644648'))
print(algo.predict('10663402', '100651469'))
print(algo.estimate('10663402', '100651469'))
                          1):  #for loop goes from (1 to 1682) in this case
        #I did try Except here cause in user_item_matrix there are some missing columns which will create error when we write
        #(user_item_matrix.iloc[user_id][movie_id]) since there is no data about the missing columns
        #Since my model can predict everycolumn eventhough it is missing I wrote except: part to just take out the predictions of
        #those missing columns to output file. This way I did not need to create the actual columns for the missing data.
        #Instead I output the prediction from model.predict to output file.
        try:
            #https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html for iloc[][] usage.
            if pd.isnull(
                    user_item_matrix.iloc[user_id][movie_id]
            ):  #user_item_matrix.iloc[0][1]  means first users rating for 1 movie

                #model.predict documentation https://surprise.readthedocs.io/en/stable/getting_started.html
                #predict(1,1) means the prediction that user 1 made for movie 1 thats why I wrote user_id+1
                pred = model.predict(
                    user_id + 1, movie_id
                ).est  #doing prediction with SVDpp model for each user to each movie one by one

                if type(
                        pred
                ) == int:  #if the prediction is int. It gives error I put the condition to handle the error
                    rating = pred
                else:
                    rating = pred.round()
                rating = int(
                    rating)  #to be able to put integers instead of floats

            else:  #if the user already gave rating to movie we just take that instead of predicting the rating.
                rating = int(user_item_matrix.iloc[user_id][movie_id])

        except:  # if the movie does not exists in dataframe the codes gives error so instead I handled the error by making the prediction and insert it to rating value for that spesific user to non existed movie.