Python CoClustering.fit 예제들, surprise.CoClustering.fit Python 예제들

예제 #1

0

파일 보기

def coClustering(trainset, testset):
    # CoClustering
    print("\n" + "-" * 5 + " CoClustering algorithm using surprise package " +
          "-" * 5)
    algo = CoClustering()
    algo.fit(trainset)
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions)
    mae = accuracy.mae(predictions)
    return rmse, mae, predictions

예제 #2

0

파일 보기

def Coclustering(recipe_df, train_rating_df, pd, benchmark):
    print("\n###### Compute CoClustering ######")
    df = pd.merge(recipe_df, train_rating_df, on='recipe_id', how='inner')
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df[['user_id', 'recipe_id', 'rating']], reader)
    trainSet, testSet = train_test_split(data, test_size=.2, random_state=0)

    algo = CoClustering()
    algo.fit(trainSet)
    predictions = algo.test(testSet)

    Evaluators.RunAllEvals(predictions, benchmark)

예제 #3

0

파일 보기

파일: SurpriseLib.py 프로젝트: huajian1069/Machine-Learning

def cluster(data, kwargs):            
    # Set algorithm
    cluster_u     = kwargs.get('user_cluster')
    cluster_i     = kwargs.get('item_cluster')
    n_epochs      = kwargs.get('maxiter')
    
    # Set algorithm
    algo = CoClustering(n_cltr_u  = cluster_u[0], n_cltr_i  = cluster_i[0],
                        n_epochs  = n_epochs , random_state = kwargs['random_seed'] )
    
    # Train the algorithm on the data, and predict ratings for the testset
    algo.fit(data)
    
    prediction = np.zeros([10000,1000])
    for row in range(10000):
        for col in range(1000):
            prediction[row,col] = algo.predict(str(row+1),str(col+1)).est
            
    return prediction

예제 #4

0

파일 보기

파일: implementations.py 프로젝트: marieanselmet/Netflix_recommender_system

def co_clustering(train, test, ids, Xtest, Xids):
    """
    Co-clustering algorithm, users and items assigned to clusters and co_clusters
    Argument : train, the trainset
               test, the testset
               ids, unknown ratings
               Xtest, predicted ratings for testset, to be used for final blending
               Xids, predicted ratings for unknown ratings, to be used for final blending
    """
    print('Co-clustering')
    algo = CoClustering(n_cltr_u=1, n_cltr_i=1, n_epochs=50, random_state=15)

    #Train algorithm on training set
    algo.fit(train)

    #Predict on train and compute RMSE
    predictions = algo.test(train.build_testset())
    print('   Training RMSE: ', accuracy.rmse(predictions, verbose=False))

    #Predict on test and compute RMSE
    predictions = algo.test(test)
    rmse = accuracy.rmse(predictions, verbose=False)
    print('   Test RMSE: ', rmse)

    preds_test = np.zeros(len(predictions))
    for j, pred in enumerate(predictions):
        preds_test[j] = pred.est

    #Predict unknown ratings
    preds_ids = []
    for i in range(len(ids[0])):
        pred = algo.predict(str(ids[0][i]), str(ids[1][i]))
        preds_ids.append(pred.est)

    Xtest.append(preds_test)
    Xids.append(preds_ids)
    return rmse, Xtest, Xids, preds_test, preds_ids

예제 #5

0

파일 보기

 def co_clustering(self, n_cltr_u=10, n_cltr_i=10, n_epochs=20):
     """
     k Nearest Negihbors collaborative filtering algorithm taking into
     account a baseline rating. 
     Args:
         n_cltr_u: Number of user clusters
         n_cltr_i: Number of item clusters
         n_epochs: Number of iteration of the optimization loop
     Returns:
         predictions_df: The predictions of the model on the test data in
             Pandas Data Frame format
     """
     algorithm = CoClustering(n_cltr_u=n_cltr_u, n_cltr_i=n_cltr_i,
          n_epochs=n_epochs)
     predictions = algorithm.fit(self.train_data).test(self.test_data)
     predictions_df = self.data.test_df.copy()
     predictions_df['Rating'] = [x.est for x in predictions]
     if self.test_purpose: 
         self.evalueate_model(predictions_df['Rating'], 'Surprise co_clustering')
     return predictions_df

예제 #6

0

파일 보기

파일: CoClustering.py 프로젝트: pqz793/Web

'''
file_path1 = os.path.expanduser("../test1.csv")

reader1 = Reader(line_format="user item rating", sep=',')

data = Dataset.load_from_file(file_path, reader=reader)
data1 = Dataset.load_from_file(file_path1, reader=reader1)
'''
trainset = data.build_full_trainset()
#testset = data1.build_full_trainset()
# Use the famous SVD algorithm.
algo = CoClustering()

# Run 5-fold cross-validation and print results.
#cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
algo.fit(trainset)
'''
predictions = algo.test(testset)

print(predictions)
'''
'''
uid = str(0)  # raw user id (as in the ratings file). They are **strings**!
iid = str(35546)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, verbose=True)
print(pred[3])
'''
testset = open("../test1.csv", "r")
result = open("./result/result_CoClustering.txt", "w")

예제 #7

0

파일 보기

for i in range(5):

    movieRecc2 = topMovies2[i]
    movieRawID2 = movieRecc2[0]
    movieName2 = movie[movieRawID2]
    print(str(i+1) + '. ' + movieName2 )


#############predictions using Co-Clustering
print('')
print('Making more recommendations...')


algo3 = CoClustering()
algo3.fit(trainset)

predictions3 = algo3.test(testset)
dictMovies3 = get_top_n(predictions3)
topMovies3 = dictMovies3.get(672)

print('')
print('Here are the top 5 recommendations based on Co-Clustering! ')

for i in range(5):

    movieRecc3 = topMovies3[i]
    movieRawID3 = movieRecc3[0]
    movieName3 = movie[movieRawID3]
    print(str(i+1) + '. ' + movieName3 )

예제 #8

0

파일 보기

파일: surprise.py 프로젝트: Phlasse/Fed_up

class Surprise():
    def train(self, algo='SVD', like=True, test='cv', local=False):

        if local:
            csv_path = os.path.join(os.path.dirname(__file__),
                                    "data/preprocessed")
            self.recipes = pd.read_csv(f"{csv_path}/recipe_pp.csv")
            self.reviews = pd.read_csv(f"{csv_path}/review_pp.csv")
        else:
            self.recipes = storage.import_file('data/preprocessed',
                                               'recipe_pp.csv')
            self.reviews = storage.import_file('data/preprocessed',
                                               'review_pp.csv')

        if like:
            self.target = 'liked'
            self.s_min = 0
            self.s_max = 1
        else:
            self.target = 'rating'
            self.s_min = 1
            self.s_max = 5

        reader = Reader(rating_scale=(self.s_min, self.s_max))

        self.relevant_data = self.reviews[[
            'user_id', 'recipe_id', self.target
        ]]
        model_data = Dataset.load_from_df(self.relevant_data, reader)

        # Algos

        if 'NormalPredictor':
            self.algorithm = NormalPredictor()

        elif 'BaselineOnly':
            self.algorithm = BaselineOnly()

        elif 'KNNBasic':
            self.algorithm = KNNBasic()

        elif 'KNNWithMeans':
            self.algorithm = KNNWithMeans()

        elif 'KNNWithZScore':
            self.algorithm = KNNWithZScore()

        elif 'KNNBaseline':
            self.algorithm = KNNBaseline()

        elif 'SVD':
            params = {
                'n_epochs': 20,
                'n_factors': 100,
                'lr_all': 0.002,
                'reg_all': 0.02
            }
            self.algorithm = SVD(params)  # Tuned with svd_grid

        elif 'SVDpp':
            self.algorithm = SVDpp()

        elif 'NMF':
            self.algorithm = NMF()

        elif 'SlopeOne':
            self.algorithm = SlopeOne()

        elif 'CoClustering':
            self.algorithm = CoClustering()

        if test == 'cv':
            cv_results = cross_validate(self.algorithm,
                                        model_data,
                                        measures=['RMSE', 'MAE'],
                                        cv=5,
                                        verbose=True)
            rmse = np.round(cv_results['test_rmse'].mean(), 3)
            mae = np.round(cv_results['test_mae'].mean(), 3)
            train_data = model_data.build_full_trainset()
            self.algorithm.fit(train_data)

        elif test == 'svd_grid':
            param_grid = {
                'n_epochs': [10, 20],
                'n_factors': [100, 200],
                'lr_all': [0.001, 0.002],
                'reg_all': [0.01, 0.02]
            }
            train_data = model_data.build_full_trainset()
            gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
            gs.fit(model_data)
            rmse = gs.best_score['rmse']
            mae = gs.best_score['mae']
            print(gs.best_params['rmse'], gs.best_params['mae'])
            self.algorithm = gs.best_estimator['rmse']
            train_data = model_data.build_full_trainset()
            self.algorithm.fit(train_data)

        else:
            train, test = train_test_split(model_data,
                                           test_size=0.3,
                                           random_state=42)
            self.algorithm.fit(train)
            predictions = self.algorithm.test(test)
            rmse = np.round(accuracy.rmse(predictions), 3)
            mae = np.round(accuracy.mae(predictions), 3)

        return rmse, mae

    def predict(self, user_id):

        inputs = self.relevant_data[self.relevant_data['user_id'] == user_id] \
                 .merge(self.recipes, on="recipe_id", how="left")[['recipe_id', 'name', self.target]]

        display(inputs)

        user_recipes = self.relevant_data[self.relevant_data['user_id'] ==
                                          user_id].recipe_id.unique()
        recipe_list = self.relevant_data[
            self.relevant_data['user_id'] != user_id].recipe_id.unique()
        predictions = [
            self.algorithm.predict(user_id, rec) for rec in recipe_list
            if rec not in list(user_recipes)
        ]

        pdf = pd.DataFrame(predictions,
                           columns=[
                               'user_id', 'recipe_id', self.target,
                               f'rec_{self.target}', 'details'
                           ])
        pdf = pdf.drop(columns=[self.target, 'details'])
        pdf = pdf.sort_values(f'rec_{self.target}', ascending=False)

        rec_target = pdf[f'rec_{self.target}']
        pdf['rec_score'] = (rec_target - self.s_min) / (self.s_max -
                                                        self.s_min)

        outputs = pdf.merge(self.recipes, on="recipe_id", how="left")[[
            'recipe_id', 'name', f'rec_{self.target}', 'rec_score'
        ]]

        display(outputs.head(10))

        return outputs

예제 #9

0

파일 보기

파일: CoClustering.py 프로젝트: raissaccorreia/movieRecommender

print("Best Params\n", gs.best_params)
print("Best Estimators\n", gs.best_estimator)
print("Best Index\n", gs.best_index)
print("Results Dicts: \n")
results_df = pd.DataFrame.from_dict(gs.cv_results)
print(results_df)

# * define a cross-validation iterator
kf = KFold(n_splits=5)

# * Choosing Co-Clustering as algorithm
algo = CoClustering()

# * Train the algorithm on the trainset, and predict ratings for the testset
for trainset, testset in kf.split(data):
    predictions = algo.fit(trainset).test(testset)
    precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4)
    accuracy.rmse(predictions)
    accuracy.mae(predictions)
    accuracy.mse(predictions)
    accuracy.fcp(predictions)
    print("Precision: ",
          sum(prec for prec in precisions.values()) / len(precisions))
    print("Recall: ", sum(rec for rec in recalls.values()) / len(recalls))

df = pd.DataFrame(predictions, columns=["uid", "iid", "rui", "est", "details"])
df["err"] = abs(df.est - df.rui)
df.to_csv("predictions_CoClustering.csv")

# top_n = get_top_n(predictions, n=10)
# * Print the recommended items for each user

예제 #10

0

파일 보기

파일: CMPE256_Final.py 프로젝트: venkatesh-devale/cmpe256vidicators_GithubRecommender

# In[88]:


# We'll use the famous SVD algorithm.
from surprise import CoClustering

df_CoClustering = df_final_user_repo_star_v3.copy(deep=True);
dataCoClustering = Dataset.load_from_df(df_CoClustering, reader)


coClustering = CoClustering(n_cltr_u=3, n_cltr_i=3, n_epochs=20)

# Train the algorithm on the trainset, and predict ratings for the testset
trainsetcoClustering  = dataCoClustering.build_full_trainset()

coClustering.fit(trainsetcoClustering)

testcoClustering = trainsetcoClustering.build_anti_testset()
predictionscoClustering = coClustering.test(testcoClustering)

accuracy.rmse(predictionscoClustering)


listOfRMSE.append(accuracy.rmse(predictionscoClustering))
models.append('CoClustering')


# ## SlopeOne Implementation

# In[89]: