class _MatrixFactorization_Cython(BaseMatrixFactorizationRecommender, Incremental_Training_Early_Stopping): RECOMMENDER_NAME = "MatrixFactorization_Cython_Recommender" def __init__(self, URM_train, recompile_cython=False, algorithm_name="MF_BPR"): super(_MatrixFactorization_Cython, self).__init__(URM_train) self.n_users, self.n_items = self.URM_train.shape self.normalize = False self.algorithm_name = algorithm_name if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def fit(self, epochs=300, batch_size=1000, num_factors=10, positive_threshold_BPR=None, learning_rate=0.001, use_bias=True, sgd_mode='sgd', negative_interactions_quota=0.0, init_mean=0.0, init_std_dev=0.1, user_reg=0.0, item_reg=0.0, bias_reg=0.0, positive_reg=0.0, negative_reg=0.0, verbose=False, random_seed=None, **earlystopping_kwargs): self.num_factors = num_factors self.use_bias = use_bias self.sgd_mode = sgd_mode self.verbose = verbose self.positive_threshold_BPR = positive_threshold_BPR self.learning_rate = learning_rate assert negative_interactions_quota >= 0.0 and negative_interactions_quota < 1.0, "{}: negative_interactions_quota must be a float value >=0 and < 1.0, provided was '{}'".format( self.RECOMMENDER_NAME, negative_interactions_quota) self.negative_interactions_quota = negative_interactions_quota # Import compiled module from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch if self.algorithm_name in ["FUNK_SVD", "ASY_SVD"]: self.cythonEpoch = MatrixFactorization_Cython_Epoch( self.URM_train, algorithm_name=self.algorithm_name, n_factors=self.num_factors, learning_rate=learning_rate, sgd_mode=sgd_mode, user_reg=user_reg, item_reg=item_reg, bias_reg=bias_reg, batch_size=batch_size, use_bias=use_bias, init_mean=init_mean, negative_interactions_quota=negative_interactions_quota, init_std_dev=init_std_dev, verbose=verbose, random_seed=random_seed) elif self.algorithm_name == "MF_BPR": # Select only positive interactions URM_train_positive = self.URM_train.copy() if self.positive_threshold_BPR is not None: URM_train_positive.data = URM_train_positive.data >= self.positive_threshold_BPR URM_train_positive.eliminate_zeros() assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high" self.cythonEpoch = MatrixFactorization_Cython_Epoch( URM_train_positive, algorithm_name=self.algorithm_name, n_factors=self.num_factors, learning_rate=learning_rate, sgd_mode=sgd_mode, user_reg=user_reg, positive_reg=positive_reg, negative_reg=negative_reg, batch_size=batch_size, use_bias=use_bias, init_mean=init_mean, init_std_dev=init_std_dev, verbose=verbose, random_seed=random_seed) self._prepare_model_for_validation() self._update_best_model() self._train_with_early_stopping(epochs, algorithm_name=self.algorithm_name, **earlystopping_kwargs) self.USER_factors = self.USER_factors_best self.ITEM_factors = self.ITEM_factors_best if self.use_bias: self.USER_bias = self.USER_bias_best self.ITEM_bias = self.ITEM_bias_best self.GLOBAL_bias = self.GLOBAL_bias_best sys.stdout.flush() def _prepare_model_for_validation(self): self.USER_factors = self.cythonEpoch.get_USER_factors() self.ITEM_factors = self.cythonEpoch.get_ITEM_factors() if self.use_bias: self.USER_bias = self.cythonEpoch.get_USER_bias() self.ITEM_bias = self.cythonEpoch.get_ITEM_bias() self.GLOBAL_bias = self.cythonEpoch.get_GLOBAL_bias() def _update_best_model(self): self.USER_factors_best = self.USER_factors.copy() self.ITEM_factors_best = self.ITEM_factors.copy() if self.use_bias: self.USER_bias_best = self.USER_bias.copy() self.ITEM_bias_best = self.ITEM_bias.copy() self.GLOBAL_bias_best = self.GLOBAL_bias def _run_epoch(self, num_epoch): self.cythonEpoch.epochIteration_Cython() def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root file_subfolder = "/MatrixFactorization/Cython" file_to_compile_list = ['MatrixFactorization_Cython_Epoch.pyx'] run_compile_subprocess(file_subfolder, file_to_compile_list) print("{}: Compiled module {} in subfolder: {}".format( self.RECOMMENDER_NAME, file_to_compile_list, file_subfolder))
class MatrixFactorization_Cython(Recommender, Incremental_Training_Early_Stopping): RECOMMENDER_NAME = "MatrixFactorization_Cython_Recommender" def __init__(self, URM_train, positive_threshold=4, URM_validation=None, recompile_cython=False, algorithm="MF_BPR"): super(MatrixFactorization_Cython, self).__init__() self.URM_train = URM_train self.n_users = URM_train.shape[0] self.n_items = URM_train.shape[1] self.normalize = False self.algorithm = algorithm self.positive_threshold = positive_threshold if URM_validation is not None: self.URM_validation = URM_validation.copy() else: self.URM_validation = None self.compute_item_score = self.compute_score_MF if recompile_cython: print("Compiling in Cython") self.runCompilationScript() print("Compilation Complete") def compute_score_MF(self, user_id): scores_array = np.dot(self.W[user_id], self.H.T) return scores_array def fit(self, epochs=300, batch_size=1000, num_factors=10, learning_rate=0.01, sgd_mode='sgd', user_reg=0.0, positive_reg=0.0, negative_reg=0.0, stop_on_validation=False, lower_validatons_allowed=5, validation_metric="MAP", evaluator_object=None, validation_every_n=5): self.num_factors = num_factors self.sgd_mode = sgd_mode self.batch_size = batch_size self.learning_rate = learning_rate if evaluator_object is None and stop_on_validation: evaluator_object = SequentialEvaluator(self.URM_validation, [5]) # Import compiled module from MatrixFactorization.Cython.MatrixFactorization_Cython_Epoch import MatrixFactorization_Cython_Epoch if self.algorithm == "FUNK_SVD": self.cythonEpoch = MatrixFactorization_Cython_Epoch( self.URM_train, algorithm=self.algorithm, n_factors=self.num_factors, learning_rate=learning_rate, batch_size=1, sgd_mode=sgd_mode, user_reg=user_reg, positive_reg=positive_reg, negative_reg=0.0) elif self.algorithm == "ASY_SVD": self.cythonEpoch = MatrixFactorization_Cython_Epoch( self.URM_train, algorithm=self.algorithm, n_factors=self.num_factors, learning_rate=learning_rate, batch_size=1, sgd_mode=sgd_mode, user_reg=user_reg, positive_reg=positive_reg, negative_reg=0.0) elif self.algorithm == "MF_BPR": # Select only positive interactions URM_train_positive = self.URM_train.copy() URM_train_positive.data = URM_train_positive.data >= self.positive_threshold URM_train_positive.eliminate_zeros() assert URM_train_positive.nnz > 0, "MatrixFactorization_Cython: URM_train_positive is empty, positive threshold is too high" self.cythonEpoch = MatrixFactorization_Cython_Epoch( URM_train_positive, algorithm=self.algorithm, n_factors=self.num_factors, learning_rate=learning_rate, batch_size=1, sgd_mode=sgd_mode, user_reg=user_reg, positive_reg=positive_reg, negative_reg=negative_reg) self._train_with_early_stopping(epochs, validation_every_n, stop_on_validation, validation_metric, lower_validatons_allowed, evaluator_object, algorithm_name=self.algorithm) self.W = self.W_best self.H = self.H_best sys.stdout.flush() def _initialize_incremental_model(self): self.W_incremental = self.cythonEpoch.get_W() self.W_best = self.W_incremental.copy() self.H_incremental = self.cythonEpoch.get_H() self.H_best = self.H_incremental.copy() def _update_incremental_model(self): self.W_incremental = self.cythonEpoch.get_W() self.H_incremental = self.cythonEpoch.get_H() self.W = self.W_incremental self.H = self.H_incremental def _update_best_model(self): self.W_best = self.W_incremental.copy() self.H_best = self.H_incremental.copy() def _run_epoch(self, num_epoch): self.cythonEpoch.epochIteration_Cython() def runCompilationScript(self): # Run compile script setting the working directory to ensure the compiled file are contained in the # appropriate subfolder and not the project root compiledModuleSubfolder = "/MatrixFactorization/Cython" fileToCompile_list = ['MatrixFactorization_Cython_Epoch.pyx'] for fileToCompile in fileToCompile_list: command = [ 'python', 'compileCython.py', fileToCompile, 'build_ext', '--inplace' ] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) try: command = ['cython', fileToCompile, '-a'] output = subprocess.check_output(' '.join(command), shell=True, cwd=os.getcwd() + compiledModuleSubfolder) except: pass print("Compiled module saved in subfolder: {}".format( compiledModuleSubfolder)) # Command to run compilation script # python compileCython.py MatrixFactorization_Cython_Epoch.pyx build_ext --inplace # Command to generate html report # cython -a MatrixFactorization_Cython_Epoch.pyx def writeCurrentConfig(self, currentEpoch, results_run, logFile): current_config = { 'learn_rate': self.learning_rate, 'num_factors': self.num_factors, 'batch_size': 1, 'epoch': currentEpoch } print("Test case: {}\nResults {}\n".format(current_config, results_run)) sys.stdout.flush() if (logFile != None): logFile.write("Test case: {}, Results {}\n".format( current_config, results_run)) logFile.flush() def saveModel(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME, folder_path + file_name)) dictionary_to_save = {"W": self.W, "H": self.H} pickle.dump(dictionary_to_save, open(folder_path + file_name, "wb"), protocol=pickle.HIGHEST_PROTOCOL) print("{}: Saving complete")