Beispiel #1
0
    def __init__(self,
                 train_file,
                 test_file,
                 prediction_file=None,
                 steps=30,
                 learn_rate=0.01,
                 delta=0.015,
                 factors=10,
                 init_mean=0.1,
                 init_stdev=0.1,
                 bias_learn_rate=0.005,
                 bias_reg=0.002,
                 random_seed=0):
        MatrixFactorization.__init__(self,
                                     train_file=train_file,
                                     test_file=test_file,
                                     prediction_file=prediction_file,
                                     steps=steps,
                                     learn_rate=learn_rate,
                                     delta=delta,
                                     factors=factors,
                                     init_mean=init_mean,
                                     init_stdev=init_stdev,
                                     baseline=True,
                                     bias_learn_rate=bias_learn_rate,
                                     delta_bias=bias_reg,
                                     random_seed=random_seed)

        self.y = np.random.normal(self.init_mean, self.init_stdev,
                                  (len(self.items), self.factors))
        self.n_u = dict()
        # |N(u)|^(-1/2)
        for u, user in enumerate(self.users):
            self.n_u[u] = np.power(
                len(self.train_set["feedback"].get(user, [0])), -.5)
Beispiel #2
0
 def get_mf(self):
     MatrixFactorization(
         train_file=self.train_file,
         test_file=self.test_file,
         output_file=self.output_file,
         factors=int(self.factors),
         learn_rate=self.learn_rate,
         delta=self.delta,
         sep=self.sep,
         output_sep=self.output_sep).compute(verbose_evaluation=False)
Beispiel #3
0
    def run_recommenders(self, r):
        """
        1: Item KNN
        2: User KNN
        3: Matrix Factorization
        4: SVD++
        """

        flag = True

        if not self.unlabeled_data[r]:
            flag = False

            return [], flag

        else:
            if r == 1:
                rec = ItemKNN(self.labeled_files[r],
                              self.unlabeled_files[r],
                              as_similar_first=True)
                rec.read_files()
                rec.init_model()
                rec.train_baselines()
                rec.predict()
                self.recommenders_predictions.setdefault(r, rec.predictions)
            elif r == 2:
                rec = UserKNN(self.labeled_files[r], self.unlabeled_files[r])
                rec.read_files()
                rec.init_model()
                rec.train_baselines()
                rec.predict()
                self.recommenders_predictions.setdefault(r, rec.predictions)
            elif r == 3:
                rec = MatrixFactorization(self.labeled_files[r],
                                          self.unlabeled_files[r],
                                          random_seed=1,
                                          baseline=True)
                rec.read_files()
                rec.init_model()
                rec.fit()
                rec.predict()
                self.recommenders_predictions.setdefault(r, rec.predictions)
            elif r == 4:
                rec = SVDPlusPlus(self.labeled_files[r],
                                  self.unlabeled_files[r],
                                  random_seed=1)
                rec.read_files()
                rec.fit()
                rec.predict()
                self.recommenders_predictions.setdefault(r, rec.predictions)

            else:
                raise NameError('Invalid Recommender!')

            return rec.predictions, flag
Beispiel #4
0
"""

from caserec.recommenders.rating_prediction.svdplusplus import SVDPlusPlus
from caserec.recommenders.rating_prediction.matrixfactorization import MatrixFactorization
from caserec.utils.cross_validation import CrossValidation

db = 'C:/Users/user/OneDrive/ml-100k/u.data'
folds_path = 'C:/Users/user/OneDrive/ml-100k/'

metadata_item = 'C:/Users/user/OneDrive/ml-100k/db_item_subject.dat'
sm_item = 'C:/Users/user/OneDrive/ml-100k/sim_item.dat'
metadata_user = '******'
sm_user = '******'

tr = 'C:/Users/user/OneDrive/ml-100k/folds/0/train.dat'
te = 'C:/Users/user/OneDrive/ml-100k/folds/0/test.dat'
"""

    UserKNN

"""

# Cross Validation
recommender = MatrixFactorization()

# CrossValidation(input_file=db, recommender=recommender, dir_folds=folds_path, header=1, k_folds=5).compute()

# # Simple
MatrixFactorization(tr, te).compute()
SVDPlusPlus(tr, te).compute()
from caserec.utils.cross_validation import CrossValidation

db = '../../datasets/ml-100k/u.data'
folds_path = '../../datasets/ml-100k/'

metadata_item = '../../datasets/ml-100k/db_item_subject.dat'
sm_item = '../../datasets/ml-100k/sim_item.dat'
metadata_user = '******'
sm_user = '******'

tr = '../../datasets/ml-100k/folds/0/train.dat'
te = '../../datasets/ml-100k/folds/0/test.dat'
"""

    UserKNN

"""

# Cross Validation
recommender = MatrixFactorization()

CrossValidation(input_file=db,
                recommender=recommender,
                dir_folds=folds_path,
                header=1,
                k_folds=5).compute()

# # Simple
# MatrixFactorization(tr, te).compute()
# SVDPlusPlus(tr, te).compute()
Beispiel #6
0
    UserKNN

"""

# Cross Validation
# recommender = MatrixFactorization()

# CrossValidation(input_file=db, recommender=recommender, dir_folds=folds_path, header=1, k_folds=5).compute()

# # Simple
# MatrixFactorization(tr, te).compute()
# SVDPlusPlus(tr, te).compute()

train = "C:/dev/train.dat"
test = "C:/dev/test.dat"
metadata = "C:/dev/item_subject.dat"
# GSVDPlusPlus(train, test, metadata_file=metadata).compute()
similarity = "C:/dev/vsm.dat"

MatrixFactorization(train, test, random_seed=0, epochs=30,
                    stop_criteria=0.009).compute()
ItemMSMF(train,
         test,
         similarity_file=similarity,
         neighbors=20,
         random_seed=0,
         baseline=True,
         epochs=30,
         stop_criteria=0.009).compute()