Exemple #1
0
    def fit(self,
            epochs=300,
            batch_size=1000,
            num_factors=10,
            positive_threshold_BPR=None,
            learning_rate=0.001,
            use_bias=True,
            sgd_mode='sgd',
            negative_interactions_quota=0.0,
            init_mean=0.0,
            init_std_dev=0.1,
            user_reg=0.0,
            item_reg=0.0,
            bias_reg=0.0,
            positive_reg=0.0,
            negative_reg=0.0,
            verbose=False,
            random_seed=None,
            **earlystopping_kwargs):

        self.num_factors = num_factors
        self.use_bias = use_bias
        self.sgd_mode = sgd_mode
        self.verbose = verbose
        self.positive_threshold_BPR = positive_threshold_BPR
        self.learning_rate = learning_rate

        assert negative_interactions_quota >= 0.0 and negative_interactions_quota < 1.0, "{}: negative_interactions_quota must be a float value >=0 and < 1.0, provided was '{}'".format(
            self.RECOMMENDER_NAME, negative_interactions_quota)
        self.negative_interactions_quota = negative_interactions_quota

        # Import compiled module
        from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch

        if self.algorithm_name in ["FUNK_SVD", "ASY_SVD"]:

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm_name=self.algorithm_name,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                item_reg=item_reg,
                bias_reg=bias_reg,
                batch_size=batch_size,
                use_bias=use_bias,
                init_mean=init_mean,
                negative_interactions_quota=negative_interactions_quota,
                init_std_dev=init_std_dev,
                verbose=verbose,
                random_seed=random_seed)

        elif self.algorithm_name == "MF_BPR":

            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            if self.positive_threshold_BPR is not None:
                URM_train_positive.data = URM_train_positive.data >= self.positive_threshold_BPR
                URM_train_positive.eliminate_zeros()

                assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                URM_train_positive,
                algorithm_name=self.algorithm_name,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=negative_reg,
                batch_size=batch_size,
                use_bias=use_bias,
                init_mean=init_mean,
                init_std_dev=init_std_dev,
                verbose=verbose,
                random_seed=random_seed)
        self._prepare_model_for_validation()
        self._update_best_model()

        self._train_with_early_stopping(epochs,
                                        algorithm_name=self.algorithm_name,
                                        **earlystopping_kwargs)

        self.USER_factors = self.USER_factors_best
        self.ITEM_factors = self.ITEM_factors_best

        if self.use_bias:
            self.USER_bias = self.USER_bias_best
            self.ITEM_bias = self.ITEM_bias_best
            self.GLOBAL_bias = self.GLOBAL_bias_best

        sys.stdout.flush()
Exemple #2
0
class _MatrixFactorization_Cython(BaseMatrixFactorizationRecommender,
                                  Incremental_Training_Early_Stopping):

    RECOMMENDER_NAME = "MatrixFactorization_Cython_Recommender"

    def __init__(self,
                 URM_train,
                 recompile_cython=False,
                 algorithm_name="MF_BPR"):
        super(_MatrixFactorization_Cython, self).__init__(URM_train)

        self.n_users, self.n_items = self.URM_train.shape
        self.normalize = False
        self.algorithm_name = algorithm_name

        if recompile_cython:
            print("Compiling in Cython")
            self.runCompilationScript()
            print("Compilation Complete")

    def fit(self,
            epochs=300,
            batch_size=1000,
            num_factors=10,
            positive_threshold_BPR=None,
            learning_rate=0.001,
            use_bias=True,
            sgd_mode='sgd',
            negative_interactions_quota=0.0,
            init_mean=0.0,
            init_std_dev=0.1,
            user_reg=0.0,
            item_reg=0.0,
            bias_reg=0.0,
            positive_reg=0.0,
            negative_reg=0.0,
            verbose=False,
            random_seed=None,
            **earlystopping_kwargs):

        self.num_factors = num_factors
        self.use_bias = use_bias
        self.sgd_mode = sgd_mode
        self.verbose = verbose
        self.positive_threshold_BPR = positive_threshold_BPR
        self.learning_rate = learning_rate

        assert negative_interactions_quota >= 0.0 and negative_interactions_quota < 1.0, "{}: negative_interactions_quota must be a float value >=0 and < 1.0, provided was '{}'".format(
            self.RECOMMENDER_NAME, negative_interactions_quota)
        self.negative_interactions_quota = negative_interactions_quota

        # Import compiled module
        from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch

        if self.algorithm_name in ["FUNK_SVD", "ASY_SVD"]:

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm_name=self.algorithm_name,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                item_reg=item_reg,
                bias_reg=bias_reg,
                batch_size=batch_size,
                use_bias=use_bias,
                init_mean=init_mean,
                negative_interactions_quota=negative_interactions_quota,
                init_std_dev=init_std_dev,
                verbose=verbose,
                random_seed=random_seed)

        elif self.algorithm_name == "MF_BPR":

            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            if self.positive_threshold_BPR is not None:
                URM_train_positive.data = URM_train_positive.data >= self.positive_threshold_BPR
                URM_train_positive.eliminate_zeros()

                assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                URM_train_positive,
                algorithm_name=self.algorithm_name,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=negative_reg,
                batch_size=batch_size,
                use_bias=use_bias,
                init_mean=init_mean,
                init_std_dev=init_std_dev,
                verbose=verbose,
                random_seed=random_seed)
        self._prepare_model_for_validation()
        self._update_best_model()

        self._train_with_early_stopping(epochs,
                                        algorithm_name=self.algorithm_name,
                                        **earlystopping_kwargs)

        self.USER_factors = self.USER_factors_best
        self.ITEM_factors = self.ITEM_factors_best

        if self.use_bias:
            self.USER_bias = self.USER_bias_best
            self.ITEM_bias = self.ITEM_bias_best
            self.GLOBAL_bias = self.GLOBAL_bias_best

        sys.stdout.flush()

    def _prepare_model_for_validation(self):
        self.USER_factors = self.cythonEpoch.get_USER_factors()
        self.ITEM_factors = self.cythonEpoch.get_ITEM_factors()

        if self.use_bias:
            self.USER_bias = self.cythonEpoch.get_USER_bias()
            self.ITEM_bias = self.cythonEpoch.get_ITEM_bias()
            self.GLOBAL_bias = self.cythonEpoch.get_GLOBAL_bias()

    def _update_best_model(self):
        self.USER_factors_best = self.USER_factors.copy()
        self.ITEM_factors_best = self.ITEM_factors.copy()

        if self.use_bias:
            self.USER_bias_best = self.USER_bias.copy()
            self.ITEM_bias_best = self.ITEM_bias.copy()
            self.GLOBAL_bias_best = self.GLOBAL_bias

    def _run_epoch(self, num_epoch):
        self.cythonEpoch.epochIteration_Cython()

    def runCompilationScript(self):

        # Run compile script setting the working directory to ensure the compiled file are contained in the
        # appropriate subfolder and not the project root

        file_subfolder = "/MatrixFactorization/Cython"
        file_to_compile_list = ['MatrixFactorization_Cython_Epoch.pyx']

        run_compile_subprocess(file_subfolder, file_to_compile_list)

        print("{}: Compiled module {} in subfolder: {}".format(
            self.RECOMMENDER_NAME, file_to_compile_list, file_subfolder))
Exemple #3
0
class MatrixFactorization_Cython(Recommender,
                                 Incremental_Training_Early_Stopping):

    RECOMMENDER_NAME = "MatrixFactorization_Cython_Recommender"

    def __init__(self,
                 URM_train,
                 positive_threshold=4,
                 URM_validation=None,
                 recompile_cython=False,
                 algorithm="MF_BPR"):

        super(MatrixFactorization_Cython, self).__init__()

        self.URM_train = URM_train
        self.n_users = URM_train.shape[0]
        self.n_items = URM_train.shape[1]
        self.normalize = False

        self.algorithm = algorithm

        self.positive_threshold = positive_threshold

        if URM_validation is not None:
            self.URM_validation = URM_validation.copy()
        else:
            self.URM_validation = None

        self.compute_item_score = self.compute_score_MF

        if recompile_cython:
            print("Compiling in Cython")
            self.runCompilationScript()
            print("Compilation Complete")

    def compute_score_MF(self, user_id):

        scores_array = np.dot(self.W[user_id], self.H.T)

        return scores_array

    def fit(self,
            epochs=300,
            batch_size=1000,
            num_factors=10,
            learning_rate=0.01,
            sgd_mode='sgd',
            user_reg=0.0,
            positive_reg=0.0,
            negative_reg=0.0,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=5):

        self.num_factors = num_factors
        self.sgd_mode = sgd_mode
        self.batch_size = batch_size
        self.learning_rate = learning_rate

        if evaluator_object is None and stop_on_validation:
            evaluator_object = SequentialEvaluator(self.URM_validation, [5])

        # Import compiled module
        from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch

        if self.algorithm == "FUNK_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "ASY_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "MF_BPR":

            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                URM_train_positive,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=negative_reg)

        self._train_with_early_stopping(epochs,
                                        validation_every_n,
                                        stop_on_validation,
                                        validation_metric,
                                        lower_validatons_allowed,
                                        evaluator_object,
                                        algorithm_name=self.algorithm)

        self.W = self.W_best
        self.H = self.H_best

        sys.stdout.flush()

    def _initialize_incremental_model(self):

        self.W_incremental = self.cythonEpoch.get_W()
        self.W_best = self.W_incremental.copy()

        self.H_incremental = self.cythonEpoch.get_H()
        self.H_best = self.H_incremental.copy()

    def _update_incremental_model(self):

        self.W_incremental = self.cythonEpoch.get_W()
        self.H_incremental = self.cythonEpoch.get_H()

        self.W = self.W_incremental
        self.H = self.H_incremental

    def _update_best_model(self):

        self.W_best = self.W_incremental.copy()
        self.H_best = self.H_incremental.copy()

    def _run_epoch(self, num_epoch):
        self.cythonEpoch.epochIteration_Cython()

    def runCompilationScript(self):

        # Run compile script setting the working directory to ensure the compiled file are contained in the
        # appropriate subfolder and not the project root

        compiledModuleSubfolder = "/MatrixFactorization/Cython"
        fileToCompile_list = ['MatrixFactorization_Cython_Epoch.pyx']

        for fileToCompile in fileToCompile_list:

            command = [
                'python', 'compileCython.py', fileToCompile, 'build_ext',
                '--inplace'
            ]

            output = subprocess.check_output(' '.join(command),
                                             shell=True,
                                             cwd=os.getcwd() +
                                             compiledModuleSubfolder)

            try:

                command = ['cython', fileToCompile, '-a']

                output = subprocess.check_output(' '.join(command),
                                                 shell=True,
                                                 cwd=os.getcwd() +
                                                 compiledModuleSubfolder)

            except:
                pass

        print("Compiled module saved in subfolder: {}".format(
            compiledModuleSubfolder))

        # Command to run compilation script
        # python compileCython.py MatrixFactorization_Cython_Epoch.pyx build_ext --inplace

        # Command to generate html report
        # cython -a MatrixFactorization_Cython_Epoch.pyx

    def writeCurrentConfig(self, currentEpoch, results_run, logFile):

        current_config = {
            'learn_rate': self.learning_rate,
            'num_factors': self.num_factors,
            'batch_size': 1,
            'epoch': currentEpoch
        }

        print("Test case: {}\nResults {}\n".format(current_config,
                                                   results_run))

        sys.stdout.flush()

        if (logFile != None):
            logFile.write("Test case: {}, Results {}\n".format(
                current_config, results_run))
            logFile.flush()

    def saveModel(self, folder_path, file_name=None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME,
                                                     folder_path + file_name))

        dictionary_to_save = {"W": self.W, "H": self.H}

        pickle.dump(dictionary_to_save,
                    open(folder_path + file_name, "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        print("{}: Saving complete")
Exemple #4
0
    def fit(self,
            epochs=300,
            batch_size=1000,
            num_factors=10,
            learning_rate=0.01,
            sgd_mode='sgd',
            user_reg=0.0,
            positive_reg=0.0,
            negative_reg=0.0,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=5):

        self.num_factors = num_factors
        self.sgd_mode = sgd_mode
        self.batch_size = batch_size
        self.learning_rate = learning_rate

        if evaluator_object is None and stop_on_validation:
            evaluator_object = SequentialEvaluator(self.URM_validation, [5])

        # Import compiled module
        from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch

        if self.algorithm == "FUNK_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "ASY_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "MF_BPR":

            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                URM_train_positive,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=negative_reg)

        self._train_with_early_stopping(epochs,
                                        validation_every_n,
                                        stop_on_validation,
                                        validation_metric,
                                        lower_validatons_allowed,
                                        evaluator_object,
                                        algorithm_name=self.algorithm)

        self.W = self.W_best
        self.H = self.H_best

        sys.stdout.flush()
    def fit(self,
            epochs=5000,
            batch_size=1000,
            num_factors=80,
            learning_rate=0.001,
            sgd_mode='adagrad',
            user_reg=0.0,
            positive_reg=0.01,
            negative_reg=0.01,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=5,
            force_compute_sim=True):

        if not force_compute_sim:
            found = True
            try:
                with open(
                        os.path.join("IntermediateComputations",
                                     "MFMatrix.pkl"), 'rb') as handle:
                    (W_new, H_new) = pickle.load(handle)
            except FileNotFoundError:
                found = False

            if found:
                self.W = W_new
                self.H = H_new
                print("Saved MF Matrix Used!")
                return

        self.num_factors = num_factors
        self.sgd_mode = sgd_mode
        self.batch_size = batch_size
        self.learning_rate = learning_rate

        if evaluator_object is None and stop_on_validation:
            evaluator_object = SequentialEvaluator(self.URM_validation, [10])

        # Import compiled module
        from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch

        if self.algorithm == "FUNK_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=1,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "ASY_SVD":

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                self.URM_train,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=32,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=0.0)

        elif self.algorithm == "MF_BPR":

            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high"

            self.cythonEpoch = MatrixFactorization_Cython_Epoch(
                URM_train_positive,
                algorithm=self.algorithm,
                n_factors=self.num_factors,
                learning_rate=learning_rate,
                batch_size=batch_size,
                sgd_mode=sgd_mode,
                user_reg=user_reg,
                positive_reg=positive_reg,
                negative_reg=negative_reg)

        self._train_with_early_stopping(epochs,
                                        validation_every_n,
                                        stop_on_validation,
                                        validation_metric,
                                        lower_validatons_allowed,
                                        evaluator_object,
                                        algorithm_name=self.algorithm)

        self.W = self.W_best
        self.H = self.H_best

        with open(os.path.join("IntermediateComputations", "MFMatrix.pkl"),
                  'wb') as handle:
            pickle.dump((self.W, self.H),
                        handle,
                        protocol=pickle.HIGHEST_PROTOCOL)

        sys.stdout.flush()