def build_item_projector(self, v): cholesky_items = self.item_cholesky_factor if cholesky_items is not None: if self.verbose: print( f'Building {self.data.fields.itemid} projector for {self.method}' ) msg = Template(' Solving triangular system: $time') with track_time(verbose=self.verbose, message=msg): self.factors['items_projector_left'] = cholesky_items.T.solve( v) msg = Template(' Applying Cholesky factor: $time') with track_time(verbose=self.verbose, message=msg): self.factors['items_projector_right'] = cholesky_items.dot(v)
def build(self, *args, **kwargs): matrix = self.get_training_matrix(sparse_format='coo', dtype='f8') user_idx, item_idx = matrix.nonzero() interactions = (user_idx, item_idx, matrix.data) nonzero_count = (matrix.getnnz(axis=1), matrix.getnnz(axis=0)) rank = self.rank lrate = self.learn_rate sigma = self.sigma num_epochs = self.num_epochs tol = self.tolerance self.rmse_history = [] self.iterations_time = [] general_config = dict(seed=self.seed, verbose=self.show_rmse, iter_errors=self.rmse_history, iter_time=self.iterations_time) with track_time(self.training_time, verbose=self.verbose, model=self.method): P, Q = self.optimizer(interactions, matrix.shape, nonzero_count, rank, lrate, sigma, num_epochs, tol, *args, **kwargs, **general_config) self.factors[self.data.fields.userid] = P self.factors[self.data.fields.itemid] = Q
def build(self): # prepare input matrix for learning the model Xs, lbls = stack_features(self.item_data, normalize=False) # item-features sparse matrix Xu = self.get_training_matrix().T # item-user sparse matrix n_nbrs = min(self.max_neighbours, int(math.sqrt(Xs.shape[0]))) A = construct_A(Xs, n_nbrs, binary=self.binary_features) with track_time(self.training_time, verbose=self.verbose, model=self.method): W, Hu, Hs = LCE(Xs, Xu, A, k=self.rank, alpha=self.alpha, beta=self.beta, lamb=self.regularization, epsilon=self.tolerance, maxiter=self.max_iterations, seed=self.seed, verbose=self.show_error) userid = self.data.fields.userid itemid = self.data.fields.itemid self.factors[userid] = Hu.T self.factors[itemid] = W self.factors['item_features'] = Hs.T self.feature_labels = lbls
def build(self): user_item_matrix = self.get_training_matrix() if self.implicit: # np.sign allows for negative values as well user_item_matrix.data = np.sign(user_item_matrix.data) with track_time(self.training_time, verbose=self.verbose, model=self.method): i2i_matrix = user_item_matrix.T.dot( user_item_matrix) # gives CSC format i2i_matrix.setdiag(0) # exclude "self-links" i2i_matrix.eliminate_zeros() self._i2i_matrix = i2i_matrix
def _update_cholesky_inplace(self, entity): entity_similarity = self.data.get_relations_matrix(entity) if self._sparse_mode: weight = self.features_weight beta = (1.0 - weight) / weight if self.verbose: print( 'Updating Cholesky decomposition inplace for {} similarity' .format(entity)) msg = Template(' Cholesky decomposition update time: $time') with track_time(verbose=self.verbose, message=msg): self._cholesky[entity].update_inplace(entity_similarity, beta) else: raise NotImplementedError
def build(self): self._model = LightFM(no_components=self.rank, item_alpha=self.item_alpha, user_alpha=self.user_alpha, loss=self.loss, learning_rate=self.learning_rate, learning_schedule=self.learning_schedule, max_sampled=self.max_sampled, random_state=self.seed) fit = getattr(self._model, self.fit_method) matrix = self.get_training_matrix( sparse_format='coo') # as reqired by LightFM try: item_index = self.data.index.itemid.training except AttributeError: item_index = self.data.index.itemid if self.item_features is not None: item_features = self.item_features.reindex(item_index.old.values, fill_value=[]) self._item_features_csr, self.item_features_labels = stack_features( item_features, add_identity=self.item_identity, normalize=self.normalize_item_features, dtype='f4') if self.user_features is not None: user_features = self.user_features.reindex( self.data.index.userid.training.old.values, fill_value=[]) self._user_features_csr, self.user_features_labels = stack_features( user_features, add_identity=self.user_identity, normalize=self.normalize_user_features, dtype='f4') with track_time(self.training_time, verbose=self.verbose, model=self.method): fit(matrix, item_features=self._item_features_csr, user_features=self._user_features_csr, **self.fit_params)
def build(self): # define iALS model instance self._model = implicit.bpr.BayesianPersonalizedRanking( factors=self.rank, learning_rate=self.learning_rate, regularization=self.regularization, iterations=self.num_epochs, num_threads=self.num_threads, #random_state = self.random_state # doesn't support yet ) self._model.random_state = self.random_state # for future releases # prepare input matrix for learning the model matrix = self.get_training_matrix() # user_by_item sparse matrix with track_time(self.training_time, verbose=self.verbose, model=self.method): # build the model # implicit takes item_by_user matrix as input, need to transpose self._model.fit(matrix.T, show_progress=self.show_progress)
def build(self): idx, val, shp = self.data.to_coo(tensor_mode=True) with track_time(self.training_time, verbose=self.verbose, model=self.method): (users_factors, items_factors, feedback_factors, core) = hooi(idx, val, shp, self.mlrank, growth_tol=self.growth_tol, num_iters=self.num_iters, verbose=self.show_output, parallel_ttm=self.parallel_ttm, seed=self.seed) self.factors[self.data.fields.userid] = users_factors self.factors[self.data.fields.itemid] = items_factors self.factors[self.data.fields.feedback] = feedback_factors self.factors['core'] = core
def _update_cholesky_factor(self, entity): entity_similarity = self.data.get_relations_matrix(entity) if entity_similarity is None: self._cholesky[entity] = None else: if self._sparse_mode: cholesky_decomp = cholesky_decomp_sparse mode = 'sparse' else: raise NotImplementedError weight = self.features_weight beta = (1.0 - weight) / weight if self.verbose: print('Performing {} Cholesky decomposition for {} similarity'. format(mode, entity)) msg = Template('Cholesky decomposition computation time: $time') with track_time(verbose=self.verbose, message=msg): self._cholesky[entity] = CholeskyFactor( cholesky_decomp(entity_similarity, beta=beta))
def build(self): # define iALS model instance self._model = implicit.als.AlternatingLeastSquares( factors=self.rank, regularization=self.regularization, iterations=self.num_epochs, num_threads=self.num_threads) # prepare input matrix for learning the model matrix = self.get_training_matrix() # user_by_item sparse matrix matrix.data = self.confidence(matrix.data, alpha=self.alpha, weight=self.weight_func, epsilon=self.epsilon) with track_time(self.training_time, verbose=self.verbose, model=self.method): # build the model # implicit takes item_by_user matrix as input, need to transpose self._model.fit(matrix.T)
def build(self, operator=None, return_factors='vh'): if operator is not None: svd_matrix = operator else: svd_matrix = self.get_training_matrix(dtype=np.float64) svd_params = dict(k=self.rank, return_singular_vectors=return_factors) with track_time(self.training_time, verbose=self.verbose, model=self.method): user_factors, sigma, item_factors = svds(svd_matrix, **svd_params) if user_factors is not None: user_factors = np.ascontiguousarray(user_factors[:, ::-1]) if item_factors is not None: item_factors = np.ascontiguousarray(item_factors[::-1, :]).T if sigma is not None: sigma = np.ascontiguousarray(sigma[::-1]) self.factors[self.data.fields.userid] = user_factors self.factors[self.data.fields.itemid] = item_factors self.factors['singular_values'] = sigma
def mf_sgd_boilerplate(interactions, shape, nonzero_count, rank, lrate, lambd, num_epochs, tol, sgd_sweep_func=None, transform=None, transform_params=None, adjust_gradient=None, adjustment_params=None, seed=None, verbose=False, iter_errors=None, iter_time=None): assert isinstance(interactions, tuple) # required by numba assert isinstance(nonzero_count, tuple) # required by numba nrows, ncols = shape row_shp = (nrows, rank) col_shp = (ncols, rank) rnds = np.random if seed is None else np.random.RandomState(seed) row_factors = rnds.normal(scale=0.1, size=row_shp) col_factors = rnds.normal(scale=0.1, size=col_shp) sgd_sweep_func = sgd_sweep_func or generalized_sgd_sweep transform = transform or identity transform_params = transform_params or ((), ()) adjust_gradient = adjust_gradient or identity adjustment_params = adjustment_params or ((), ()) nnz = len(interactions[-1]) last_err = np.finfo('f8').max training_time = [] for epoch in range(num_epochs): if adjust_gradient in [adagrad, rmsprop]: adjustment_params = ((np.zeros(row_shp, dtype='f8'), ), (np.zeros(col_shp, dtype='f8'), )) if adjust_gradient is gnprop: adjustment_params = ((np.zeros(nrows, dtype='f8'), ), (np.zeros(ncols, dtype='f8'), )) if adjust_gradient is adam: adjustment_params = ((np.zeros(row_shp, dtype='f8'), np.zeros(row_shp, dtype='f8'), np.zeros(nrows, dtype='intp')), (np.zeros(col_shp, dtype='f8'), np.zeros(col_shp, dtype='f8'), np.zeros(ncols, dtype='intp'))) with track_time(training_time, verbose=False): new_err = sgd_sweep_func(*interactions, row_factors, col_factors, lrate, lambd, *nonzero_count, transform, transform_params, adjust_gradient, adjustment_params) refined = abs(last_err - new_err) / last_err last_err = new_err rmse = sqrt(new_err / nnz) if iter_errors is not None: iter_errors.append(rmse) if verbose: print('Epoch: {}. RMSE: {}'.format(epoch, rmse)) if refined < tol: break if iter_time is not None: iter_time.extend(training_time) return row_factors, col_factors