def update_Z(self, X, y, verbose=False, sample_weight=None): """Greedy CD solver for the quadratic term of a factorization machine. Solves 0.5 ||y - <Z, XX'>||^2_2 + ||Z||_* Z implicitly stored as P'ΛP """ n_samples, n_features = X.shape rng = check_random_state(self.random_state) P = self.P_ lams = self.lams_ old_loss = np.inf max_rank = self.max_rank if max_rank is None: max_rank = n_features ## #residual = self.predict_quadratic(X) - y # could optimize #loss = self._loss(residual, sample_weight=sample_weight) #rms = np.sqrt(np.mean((residual) ** 2)) #print("rank={} loss={}, RMSE={}".format(0, loss, rms)) ## for _ in range(self.max_iter_inner): if self.rank_ >= max_rank: break residual = self.predict_quadratic(X) - y # could optimize if sample_weight is not None: residual *= sample_weight p = _find_basis(X, residual, **self.eigsh_kwargs) P.append(p) lams.append(0.) # refit refit_target = y.copy() K = polynomial_kernel(X, np.array(P), degree=2, gamma=1, coef0=0) if sample_weight is not None: refit_target *= np.sqrt(sample_weight) K *= np.sqrt(sample_weight)[:, np.newaxis] K = np.asfortranarray(K) lams_init = np.array(lams, dtype=np.double) # minimizes 0.5 * ||y - K * lams||_2^2 + beta * ||w||_1 lams, _, _, _ = enet_coordinate_descent(lams_init, self.beta, 0, K, refit_target, max_iter=self.refit_iter, tol=self.tol, rng=rng, random=0, positive=0) P = [p for p, lam in zip(P, lams) if np.abs(lam) > 0] lams = [lam for lam in lams if np.abs(lam) > 0] self.rank_ = len(lams) self.quadratic_trace_ = np.sum(np.abs(lams)) predict_quadratic = self.predict_quadratic(X, P, lams) residual = y - predict_quadratic # y is already shifted loss = self._loss(residual, sample_weight=sample_weight) if verbose > 0: rms = np.sqrt(np.mean((residual)**2)) print("rank={} loss={}, RMSE={}".format(self.rank_, loss, rms)) if np.abs(old_loss - loss) < self.tol: break old_loss = loss self.P_ = P self.lams_ = lams
def update_Z(self, X, y, verbose=False, sample_weight=None): """Greedy CD solver for the quadratic term of a factorization machine. Solves 0.5 ||y - <Z, XX'>||^2_2 + ||Z||_* Z implicitly stored as P'ΛP """ n_samples, n_features = X.shape rng = check_random_state(self.random_state) P = self.P_ lams = self.lams_ old_loss = np.inf max_rank = self.max_rank if max_rank is None: max_rank = n_features ## #residual = self.predict_quadratic(X) - y # could optimize #loss = self._loss(residual, sample_weight=sample_weight) #rms = np.sqrt(np.mean((residual) ** 2)) #print("rank={} loss={}, RMSE={}".format(0, loss, rms)) ## for _ in range(self.max_iter_inner): if self.rank_ >= max_rank: break residual = self.predict_quadratic(X) - y # could optimize if sample_weight is not None: residual *= sample_weight p = _find_basis(X, residual, **self.eigsh_kwargs) P.append(p) lams.append(0.) # refit refit_target = y.copy() K = polynomial_kernel(X, np.array(P), degree=2, gamma=1, coef0=0) if sample_weight is not None: refit_target *= np.sqrt(sample_weight) K *= np.sqrt(sample_weight)[:, np.newaxis] K = np.asfortranarray(K) lams_init = np.array(lams, dtype=np.double) # minimizes 0.5 * ||y - K * lams||_2^2 + beta * ||w||_1 lams, _, _, _ = enet_coordinate_descent( lams_init, self.beta, 0, K, refit_target, max_iter=self.refit_iter, tol=self.tol, rng=rng, random=0, positive=0) P = [p for p, lam in zip(P, lams) if np.abs(lam) > 0] lams = [lam for lam in lams if np.abs(lam) > 0] self.rank_ = len(lams) self.quadratic_trace_ = np.sum(np.abs(lams)) predict_quadratic = self.predict_quadratic(X, P, lams) residual = y - predict_quadratic # y is already shifted loss = self._loss(residual, sample_weight=sample_weight) if verbose > 0: rms = np.sqrt(np.mean((residual) ** 2)) print("rank={} loss={}, RMSE={}".format(self.rank_, loss, rms)) if np.abs(old_loss - loss) < self.tol: break old_loss = loss self.P_ = P self.lams_ = lams
def _dense_fit(self, X, y, Xy=None, coef_init=None): # copy was done in fit if necessary X, y, X_mean, y_mean, X_std = center_data( X, y, self.fit_intercept, self.normalize, copy=False) if y.ndim == 1: y = y[:, np.newaxis] if Xy is not None and Xy.ndim == 1: Xy = Xy[:, np.newaxis] n_samples, n_features = X.shape n_targets = y.shape[1] precompute = self.precompute if hasattr(precompute, '__array__') \ and not np.allclose(X_mean, np.zeros(n_features)) \ and not np.allclose(X_std, np.ones(n_features)): # recompute Gram precompute = 'auto' Xy = None coef_ = self._init_coef(coef_init, n_features, n_targets) dual_gap_ = np.empty(n_targets) eps_ = np.empty(n_targets) l1_reg = self.alpha*self.l1_ratio * n_samples l2_reg = 0.0#self.alpha * (1.0 - self.l1_ratio) * n_samples # precompute if n_samples > n_features if hasattr(precompute, '__array__'): Gram = precompute elif precompute or (precompute == 'auto' and n_samples > n_features): Gram = np.dot(X.T, X) else: Gram = None for k in xrange(n_targets): if Gram is None: coef_[k, :], dual_gap_[k], eps_[k] = \ cd_fast.enet_coordinate_descent( coef_[k, :], l1_reg, l2_reg, X, y[:, k], self.max_iter, self.tol, True) else: Gram = Gram.copy() if Xy is None: this_Xy = np.dot(X.T, y[:, k]) else: this_Xy = Xy[:, k] coef_[k, :], dual_gap_[k], eps_[k] = \ cd_fast.enet_coordinate_descent_gram( coef_[k, :], l1_reg, l2_reg, Gram, this_Xy, y[:, k], self.max_iter, self.tol, True) if dual_gap_[k] > eps_[k]: warnings.warn('Objective did not converge for ' + 'target %d, you might want' % k + ' to increase the number of iterations') self.coef_, self.dual_gap_, self.eps_ = (np.squeeze(a) for a in (coef_, dual_gap_, eps_)) self._set_intercept(X_mean, y_mean, X_std) # return self for chaining fit and predict calls return self