def computeUCR(self): """ Parameters ---------- Returns ------- """ # the next lines do NOT work with h5py if CUR is used -> double indices in self.cid or self.rid # can occur and are not supported by h5py. When using h5py data, always use CMD which ignores # reoccuring row/column selections. if scipy.sparse.issparse(self.data): self._C = self.data[:, self._cid] * scipy.sparse.csc_matrix(np.diag(self._ccnt**(1/2))) self._R = scipy.sparse.csc_matrix(np.diag(self._rcnt**(1/2))) * self.data[self._rid,:] self._U = pinv(self._C, self._k) * self.data[:,:] * pinv(self._R, self._k) else: self._C = np.dot(self.data[:, self._cid].reshape((self._rows, -1)), np.diag(self._ccnt**(1/2))) self._R = np.dot(np.diag(self._rcnt**(1/2)), self.data[self._rid,:].reshape((-1, self._cols))) self._U = np.dot(np.dot(pinv(self._C, self._k), self.data[:,:]), pinv(self._R, self._k)) # set some standard (with respect to SVD) variable names self.U = self._C self.S = self._U self.V = self._R
def computeUCR(self): # the next lines do NOT work with h5py if CUR is used -> double indices in self.cid or self.rid # can occur and are not supported by h5py. When using h5py data, always use CMD which ignores # reoccuring row/column selections. if scipy.sparse.issparse(self.data): self._C = self.data[:, self._cid] * scipy.sparse.csc_matrix( np.diag(self._ccnt**(1 / 2))) self._R = scipy.sparse.csc_matrix(np.diag( self._rcnt**(1 / 2))) * self.data[self._rid, :] self._U = pinv(self._C, self._k) * self.data[:, :] * pinv( self._R, self._k) else: self._C = np.dot( self.data[:, self._cid].reshape((self._rows, len(self._cid))), np.diag(self._ccnt**(1 / 2))) self._R = np.dot( np.diag(self._rcnt**(1 / 2)), self.data[self._rid, :].reshape( (len(self._rid), self._cols))) self._U = np.dot(np.dot(pinv(self._C, self._k), self.data[:, :]), pinv(self._R, self._k)) # set some standard (with respect to SVD) variable names self.U = self._C self.S = self._U self.V = self._R
def update_h(self): print self._method if self._method == 'pca': self.H = np.dot(pinv(self.W), self.data) if self._method == 'nmf': mdl = NMF(self.data, num_bases=self._num_bases) mdl.W = self.W mdl.factorize(compute_w=False, niter=50) self.H = mdl.H.copy() if self._method == 'aa': mdl = AA(self.data, num_bases=self._num_bases) mdl.W = self.W mdl.factorize(compute_w=False) self.H = mdl.H.copy()
def updateW(self): def updatesingleW(i): # optimize beta using qp solver from cvxopt FB = base.matrix(np.float64(np.dot(-self.data.T, W_hat[:, i]))) be = solvers.qp(HB, FB, INQa, INQb, EQa, EQb) self.beta[i, :] = np.array(be['x']).reshape((1, self._num_samples)) # float64 required for cvxopt HB = base.matrix(np.float64(np.dot(self.data[:, :].T, self.data[:, :]))) EQb = base.matrix(1.0, (1, 1)) W_hat = np.dot(self.data, pinv(self.H)) INQa = base.matrix(-np.eye(self._num_samples)) INQb = base.matrix(0.0, (self._num_samples, 1)) EQa = base.matrix(1.0, (1, self._num_samples)) map(updatesingleW, xrange(self._num_bases)) self.W = np.dot(self.beta, self.data.T).T
def update_w(self): """ alternating least squares step, update W enforcing a convexity constraint """ def update_single_w(i): """ compute single W[:,i] """ # optimize beta using qp solver from cvxopt FB = base.matrix(np.float64(np.dot(-self.data.T, W_hat[:,i]))) be = solvers.qp(HB, FB, INQa, INQb, EQa, EQb) self.beta[i,:] = np.array(be['x']).reshape((1, self._num_samples)) # float64 required for cvxopt HB = base.matrix(np.float64(np.dot(self.data[:,:].T, self.data[:,:]))) EQb = base.matrix(1.0, (1, 1)) W_hat = np.dot(self.data, pinv(self.H)) INQa = base.matrix(-np.eye(self._num_samples)) INQb = base.matrix(0.0, (self._num_samples, 1)) EQa = base.matrix(1.0, (1, self._num_samples)) for i in xrange(self._num_bases): update_single_w(i) self.W = np.dot(self.beta, self.data.T).T