def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none", **similarity_args): self.topK = topK self.shrink = shrink if feature_weighting not in self.FEATURE_WEIGHTING_VALUES: raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = okapi_BM_25(self.UCM_train) elif feature_weighting == "TF-IDF": self.UCM_train = self.UCM_train.astype(np.float32) self.UCM_train = TF_IDF(self.UCM_train) similarity = Compute_Similarity(self.UCM_train.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) self.W_sparse = similarity.compute_similarity() self.W_sparse = check_matrix(self.W_sparse, format='csr')
def fit(self, show_max_performance=False, precompute_common_features=False, learning_rate=0.1, positive_only_D=True, initialization_mode_D="random", normalize_similarity=False, use_dropout=True, dropout_perc=0.3, l1_reg=0.0, l2_reg=0.0, epochs=50, topK=300, add_zeros_quota=0.0, log_file=None, verbose=False, sgd_mode='adagrad', gamma=0.9, beta_1=0.9, beta_2=0.999, **earlystopping_kwargs): if initialization_mode_D not in self.INIT_TYPE_VALUES: raise ValueError( "Value for 'initialization_mode_D' not recognized. Acceptable values are {}, provided was '{}'" .format(self.INIT_TYPE_VALUES, initialization_mode_D)) # Import compiled module from Recommenders.FeatureWeighting.Cython.CFW_D_Similarity_Cython_SGD import CFW_D_Similarity_Cython_SGD self.show_max_performance = show_max_performance self.normalize_similarity = normalize_similarity self.learning_rate = learning_rate self.add_zeros_quota = add_zeros_quota self.l1_reg = l1_reg self.l2_reg = l2_reg self.epochs = epochs self.topK = topK self.log_file = log_file self.verbose = verbose self._generate_train_data() weights_initialization_D = None if initialization_mode_D == "random": weights_initialization_D = np.random.normal( 0.001, 0.1, self.n_features).astype(np.float64) elif initialization_mode_D == "one": weights_initialization_D = np.ones(self.n_features, dtype=np.float64) elif initialization_mode_D == "zero": weights_initialization_D = np.zeros(self.n_features, dtype=np.float64) elif initialization_mode_D == "BM25": weights_initialization_D = np.ones(self.n_features, dtype=np.float64) self.ICM = self.ICM.astype(np.float32) self.ICM = okapi_BM_25(self.ICM) elif initialization_mode_D == "TF-IDF": weights_initialization_D = np.ones(self.n_features, dtype=np.float64) self.ICM = self.ICM.astype(np.float32) self.ICM = TF_IDF(self.ICM) else: raise ValueError( "CFW_D_Similarity_Cython: 'init_type' not recognized") # Instantiate fast Cython implementation self.FW_D_Similarity = CFW_D_Similarity_Cython_SGD( self.row_list, self.col_list, self.data_list, self.n_features, self.ICM, precompute_common_features=precompute_common_features, positive_only_D=positive_only_D, weights_initialization_D=weights_initialization_D, use_dropout=use_dropout, dropout_perc=dropout_perc, learning_rate=learning_rate, l1_reg=l1_reg, l2_reg=l2_reg, sgd_mode=sgd_mode, verbose=self.verbose, gamma=gamma, beta_1=beta_1, beta_2=beta_2) if self.verbose: print(self.RECOMMENDER_NAME + ": Initialization completed") self.D_incremental = self.FW_D_Similarity.get_weights() self.D_best = self.D_incremental.copy() self._train_with_early_stopping(epochs, algorithm_name=self.RECOMMENDER_NAME, **earlystopping_kwargs) self.compute_W_sparse(model_to_use="best") sys.stdout.flush()
def fit(self, topK=300, epochs=30, n_factors=2, learning_rate=1e-5, precompute_user_feature_count=False, initialization_mode_D="random", positive_only_D=True, positive_only_V=True, l2_reg_D=0.01, l2_reg_V=0.01, non_negative_weights=False, verbose=False, sgd_mode='adam', gamma=0.9, beta_1=0.9, beta_2=0.999, **earlystopping_kwargs): if initialization_mode_D not in self.INIT_TYPE_VALUES: raise ValueError( "Value for 'initialization_mode_D' not recognized. Acceptable values are {}, provided was '{}'" .format(self.INIT_TYPE_VALUES, initialization_mode_D)) from FeatureWeighting.Cython.FBSM_Rating_Cython_SGD import FBSM_Rating_Cython_SGD self.n_factors = n_factors self.learning_rate = learning_rate self.l2_reg_D = l2_reg_D self.l2_reg_V = l2_reg_V self.topK = topK self.epochs = epochs self.verbose = verbose # For mean_init use Xavier Initialization if self.n_factors != 0: std_init = 1 / self.n_features / self.n_factors else: std_init = 0 mean_init = 0 weights_initialization_D = None if initialization_mode_D == "random": weights_initialization_D = np.random.normal( 0.001, 0.1, self.n_features).astype(np.float64) elif initialization_mode_D == "one": weights_initialization_D = np.ones(self.n_features, dtype=np.float64) elif initialization_mode_D == "zero": weights_initialization_D = np.zeros(self.n_features, dtype=np.float64) elif initialization_mode_D == "BM25": weights_initialization_D = np.ones(self.n_features, dtype=np.float64) self.ICM = self.ICM.astype(np.float32) self.ICM = okapi_BM_25(self.ICM) elif initialization_mode_D == "TF-IDF": weights_initialization_D = np.ones(self.n_features, dtype=np.float64) self.ICM = self.ICM.astype(np.float32) self.ICM = TF_IDF(self.ICM) else: raise ValueError( "CFW_D_Similarity_Cython: 'init_type' not recognized") self.FBSM_Rating = FBSM_Rating_Cython_SGD( self.URM_train, self.ICM, n_factors=self.n_factors, precompute_user_feature_count=precompute_user_feature_count, learning_rate=self.learning_rate, l2_reg_D=self.l2_reg_D, l2_reg_V=self.l2_reg_V, weights_initialization_D=weights_initialization_D, weights_initialization_V=None, positive_only_D=positive_only_D, positive_only_V=positive_only_V, verbose=self.verbose, sgd_mode=sgd_mode, gamma=gamma, beta_1=beta_1, beta_2=beta_2, mean_init=mean_init, std_init=std_init) if self.verbose: print(self.RECOMMENDER_NAME + ": Initialization completed") self._train_with_early_stopping(epochs, algorithm_name=self.RECOMMENDER_NAME, **earlystopping_kwargs) self.compute_W_sparse(model_to_use="best") sys.stdout.flush()