Python CoClustering.fit Examples, surprise.CoClustering.fit Python Examples

Example #1

0

Show file

def coClustering(trainset, testset):
    # CoClustering
    print("\n" + "-" * 5 + " CoClustering algorithm using surprise package " +
          "-" * 5)
    algo = CoClustering()
    algo.fit(trainset)
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)
    return rmse, mae, predictions

Example #2

0

Show file

def Coclustering(recipe_df, train_rating_df, pd, benchmark):
    print("\n###### Compute CoClustering ######")
    df = pd.merge(recipe_df, train_rating_df, on='recipe_id', how='inner')
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)
    trainSet, testSet = train_test_split(data, test_size=.2, random_state=0)

    algo = CoClustering()
    algo.fit(trainSet)
    predictions = algo.test(testSet)

    Evaluators.RunAllEvals(predictions, benchmark)

Example #3

0

Show file

File: SurpriseLib.py Project: huajian1069/Machine-Learning

def cluster(data, kwargs):            
    # Set algorithm
    cluster_u     = kwargs.get('user_cluster')
    cluster_i     = kwargs.get('item_cluster')
    n_epochs      = kwargs.get('maxiter')
    
    # Set algorithm
    algo = CoClustering(n_cltr_u  = cluster_u[0], n_cltr_i  = cluster_i[0],
                        n_epochs  = n_epochs , random_state = kwargs['random_seed'] )
    
    # Train the algorithm on the data, and predict ratings for the testset
    algo.fit(data)
    
    prediction = np.zeros([10000,1000])
    for row in range(10000):
        for col in range(1000):
            prediction[row,col] = algo.predict(str(row+1),str(col+1)).est
            
    return prediction

Example #4

0

Show file

File: implementations.py Project: marieanselmet/Netflix_recommender_system

def co_clustering(train, test, ids, Xtest, Xids):
    """
    Co-clustering algorithm, users and items assigned to clusters and co_clusters
    Argument : train, the trainset
               test, the testset
               ids, unknown ratings
               Xtest, predicted ratings for testset, to be used for final blending
               Xids, predicted ratings for unknown ratings, to be used for final blending
    """
    print('Co-clustering')
    algo = CoClustering(n_cltr_u=1, n_cltr_i=1, n_epochs=50, random_state=15)

    #Train algorithm on training set
    algo.fit(train)

    #Predict on train and compute RMSE
    predictions = algo.test(train.build_testset())
    print('   Training RMSE: ', accuracy.rmse(predictions, verbose=False))

    #Predict on test and compute RMSE
    predictions = algo.test(test)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   Test RMSE: ', rmse)

    preds_test = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds_test[j] = pred.est

    #Predict unknown ratings
    preds_ids = []
    for i in range(len(ids[0])):
        pred = algo.predict(str(ids[0][i]), str(ids[1][i]))
        preds_ids.append(pred.est)

    Xtest.append(preds_test)
    Xids.append(preds_ids)
    return rmse, Xtest, Xids, preds_test, preds_ids

Example #5

0

Show file

 def co_clustering(self, n_cltr_u=10, n_cltr_i=10, n_epochs=20):
     """
     k Nearest Negihbors collaborative filtering algorithm taking into
     account a baseline rating. 
     Args:
         n_cltr_u: Number of user clusters
         n_cltr_i: Number of item clusters
         n_epochs: Number of iteration of the optimization loop
     Returns:
         predictions_df: The predictions of the model on the test data in
             Pandas Data Frame format
     """
     algorithm = CoClustering(n_cltr_u=n_cltr_u, n_cltr_i=n_cltr_i,
          n_epochs=n_epochs)
     predictions = algorithm.fit(self.train_data).test(self.test_data)
     predictions_df = self.data.test_df.copy()
     predictions_df['Rating'] = [x.est for x in predictions]
     if self.test_purpose: 
         self.evalueate_model(predictions_df['Rating'], 'Surprise co_clustering')
     return predictions_df

Example #6

0

Show file

File: CoClustering.py Project: pqz793/Web

'''
file_path1 = os.path.expanduser("../test1.csv")

reader1 = Reader(line_format="user item rating", sep=',')

data = Dataset.load_from_file(file_path, reader=reader)
data1 = Dataset.load_from_file(file_path1, reader=reader1)
'''
trainset = data.build_full_trainset()
#testset = data1.build_full_trainset()
# Use the famous SVD algorithm.
algo = CoClustering()

# Run 5-fold cross-validation and print results.
#cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
algo.fit(trainset)
'''
predictions = algo.test(testset)

print(predictions)
'''
'''
uid = str(0)  # raw user id (as in the ratings file). They are **strings**!
iid = str(35546)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, verbose=True)
print(pred[3])
'''
testset = open("../test1.csv", "r")
result = open("./result/result_CoClustering.txt", "w")

Example #7

0

Show file

for i in range(5):

    movieRecc2 = topMovies2[i]
    movieRawID2 = movieRecc2[0]
    movieName2 = movie[movieRawID2]
    print(str(i+1) + '. ' + movieName2 )


#############predictions using Co-Clustering
print('')
print('Making more recommendations...')


algo3 = CoClustering()
algo3.fit(trainset)

predictions3 = algo3.test(testset)
dictMovies3 = get_top_n(predictions3)
topMovies3 = dictMovies3.get(672)

print('')
print('Here are the top 5 recommendations based on Co-Clustering! ')

for i in range(5):

    movieRecc3 = topMovies3[i]
    movieRawID3 = movieRecc3[0]
    movieName3 = movie[movieRawID3]
    print(str(i+1) + '. ' + movieName3 )

Example #8

0

Show file

File: surprise.py Project: Phlasse/Fed_up

class Surprise():
    def train(self, algo='SVD', like=True, test='cv', local=False):

        if local:
            csv_path = os.path.join(os.path.dirname(__file__),
                                    "data/preprocessed")
            self.recipes = pd.read_csv(f"{csv_path}/recipe_pp.csv")
            self.reviews = pd.read_csv(f"{csv_path}/review_pp.csv")
        else:
            self.recipes = storage.import_file('data/preprocessed',
                                               'recipe_pp.csv')
            self.reviews = storage.import_file('data/preprocessed',
                                               'review_pp.csv')

        if like:
            self.target = 'liked'
            self.s_min = 0
            self.s_max = 1
        else:
            self.target = 'rating'
            self.s_min = 1
            self.s_max = 5

        reader = Reader(rating_scale=(self.s_min, self.s_max))

        self.relevant_data = self.reviews[[
            'user_id', 'recipe_id', self.target
        ]]
        model_data = Dataset.load_from_df(self.relevant_data, reader)

        # Algos

        if 'NormalPredictor':
            self.algorithm = NormalPredictor()

        elif 'BaselineOnly':
            self.algorithm = BaselineOnly()

        elif 'KNNBasic':
            self.algorithm = KNNBasic()

        elif 'KNNWithMeans':
            self.algorithm = KNNWithMeans()

        elif 'KNNWithZScore':
            self.algorithm = KNNWithZScore()

        elif 'KNNBaseline':
            self.algorithm = KNNBaseline()

        elif 'SVD':
            params = {
                'n_epochs': 20,
                'n_factors': 100,
                'lr_all': 0.002,
                'reg_all': 0.02
            }
            self.algorithm = SVD(params)  # Tuned with svd_grid

        elif 'SVDpp':
            self.algorithm = SVDpp()

        elif 'NMF':
            self.algorithm = NMF()

        elif 'SlopeOne':
            self.algorithm = SlopeOne()

        elif 'CoClustering':
            self.algorithm = CoClustering()

        if test == 'cv':
            cv_results = cross_validate(self.algorithm,
                                        model_data,
                                        measures=['RMSE', 'MAE'],
                                        cv=5,
                                        verbose=True)
            rmse = np.round(cv_results['test_rmse'].mean(), 3)
            mae = np.round(cv_results['test_mae'].mean(), 3)
            train_data = model_data.build_full_trainset()
            self.algorithm.fit(train_data)

        elif test == 'svd_grid':
            param_grid = {
                'n_epochs': [10, 20],
                'n_factors': [100, 200],
                'lr_all': [0.001, 0.002],
                'reg_all': [0.01, 0.02]
            }
            train_data = model_data.build_full_trainset()
            gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
            gs.fit(model_data)
            rmse = gs.best_score['rmse']
            mae = gs.best_score['mae']
            print(gs.best_params['rmse'], gs.best_params['mae'])
            self.algorithm = gs.best_estimator['rmse']
            train_data = model_data.build_full_trainset()
            self.algorithm.fit(train_data)

        else:
            train, test = train_test_split(model_data,
                                           test_size=0.3,
                                           random_state=42)
            self.algorithm.fit(train)
            predictions = self.algorithm.test(test)
            rmse = np.round(accuracy.rmse(predictions), 3)
            mae = np.round(accuracy.mae(predictions), 3)

        return rmse, mae

    def predict(self, user_id):

        inputs = self.relevant_data[self.relevant_data['user_id'] == user_id] \
                 .merge(self.recipes, on="recipe_id", how="left")[['recipe_id', 'name', self.target]]

        display(inputs)

        user_recipes = self.relevant_data[self.relevant_data['user_id'] ==
                                          user_id].recipe_id.unique()
        recipe_list = self.relevant_data[
            self.relevant_data['user_id'] != user_id].recipe_id.unique()
        predictions = [
            self.algorithm.predict(user_id, rec) for rec in recipe_list
            if rec not in list(user_recipes)
        ]

        pdf = pd.DataFrame(predictions,
                           columns=[
                               'user_id', 'recipe_id', self.target,
                               f'rec_{self.target}', 'details'
                           ])
        pdf = pdf.drop(columns=[self.target, 'details'])
        pdf = pdf.sort_values(f'rec_{self.target}', ascending=False)

        rec_target = pdf[f'rec_{self.target}']
        pdf['rec_score'] = (rec_target - self.s_min) / (self.s_max -
                                                        self.s_min)

        outputs = pdf.merge(self.recipes, on="recipe_id", how="left")[[
            'recipe_id', 'name', f'rec_{self.target}', 'rec_score'
        ]]

        display(outputs.head(10))

        return outputs

Example #9

0

Show file

File: CoClustering.py Project: raissaccorreia/movieRecommender

print("Best Params\n", gs.best_params)
print("Best Estimators\n", gs.best_estimator)
print("Best Index\n", gs.best_index)
print("Results Dicts: \n")
results_df = pd.DataFrame.from_dict(gs.cv_results)
print(results_df)

# * define a cross-validation iterator
kf = KFold(n_splits=5)

# * Choosing Co-Clustering as algorithm
algo = CoClustering()

# * Train the algorithm on the trainset, and predict ratings for the testset
for trainset, testset in kf.split(data):
    predictions = algo.fit(trainset).test(testset)
    precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4)
    accuracy.rmse(predictions)
    accuracy.mae(predictions)
    accuracy.mse(predictions)
    accuracy.fcp(predictions)
    print("Precision: ",
          sum(prec for prec in precisions.values()) / len(precisions))
    print("Recall: ", sum(rec for rec in recalls.values()) / len(recalls))

df = pd.DataFrame(predictions, columns=["uid", "iid", "rui", "est", "details"])
df["err"] = abs(df.est - df.rui)
df.to_csv("predictions_CoClustering.csv")

# top_n = get_top_n(predictions, n=10)
# * Print the recommended items for each user

Example #10

0

Show file

File: CMPE256_Final.py Project: venkatesh-devale/cmpe256vidicators_GithubRecommender

# In[88]:


# We'll use the famous SVD algorithm.
from surprise import CoClustering

df_CoClustering = df_final_user_repo_star_v3.copy(deep=True);
dataCoClustering = Dataset.load_from_df(df_CoClustering, reader)


coClustering = CoClustering(n_cltr_u=3, n_cltr_i=3, n_epochs=20)

# Train the algorithm on the trainset, and predict ratings for the testset
trainsetcoClustering  = dataCoClustering.build_full_trainset()

coClustering.fit(trainsetcoClustering)

testcoClustering = trainsetcoClustering.build_anti_testset()
predictionscoClustering = coClustering.test(testcoClustering)

accuracy.rmse(predictionscoClustering)


listOfRMSE.append(accuracy.rmse(predictionscoClustering))
models.append('CoClustering')


# ## SlopeOne Implementation

# In[89]: