def test_sparse_rand(): A = sps.rand(10000, 30000, 0.001) Ad = A.todense() start_time = time.time() rand_svd(A, 200) print "Sparse time: ", time.time() - start_time start_time = time.time() rand_svd(Ad, 200) print "Dense time: ", time.time() - start_time
def time_batches(): A = np.ones((10000, 10000)) start_time = time.time() rand_svd(A, k=200) print "Large: ", time.time() - start_time B = np.ones((1000, 10000)) start_time =time.time() rand_svd(B, k=200) print "Small: ", time.time() - start_time start_time = time.time() np.linalg.svd(B) print "Exact: ", time.time() - start_time
def time_batches(): A = np.ones((10000, 10000)) start_time = time.time() rand_svd(A, k=200) print "Large: ", time.time() - start_time B = np.ones((1000, 10000)) start_time = time.time() rand_svd(B, k=200) print "Small: ", time.time() - start_time start_time = time.time() np.linalg.svd(B) print "Exact: ", time.time() - start_time
def _sparse_rand_sketch(self, mat_b): print "In sparse rand sketch" mat_u, vec_sigma, mat_vt = rand_svd(mat_b, self.l, raw=True) squared_sv_center = vec_sigma[self.del_ind] ** 2 sigma_tilde = list(vec_sigma[:self.alpha_ind]) + [(0.0 if d < 0.0 else math.sqrt(d)) for d in (vec_sigma ** 2 - squared_sv_center)[self.alpha_ind:]] # saves us from having to construct a diagonal matrix new_mat_b = (mat_vt.T * np.array(sigma_tilde)).T return sps.vstack((sps.lil_matrix(new_mat_b), sps.lil_matrix((self.b_size, self.m))), format='lil')
def _rand_svd_sketch(self, mat_b): # does computation in place # works for dense mat_b mat_u, vec_sigma, mat_vt = rand_svd(mat_b, self.l, raw=True) squared_sv_center = vec_sigma[self.del_ind] ** 2 # below can be done in numpy for sure #trunc_vec = vec_sigma[self.alpha_ind:] #trunc_vec = trunc_vec **2 - squared_sv_center #trunc_vec[trunc_vec < 0] = 0 #np.sqrt(trunc_vec, out=trunc_vec) sigma_tilde = list(vec_sigma[:self.alpha_ind]) + [(0.0 if d < 0.0 else math.sqrt(d)) for d in (vec_sigma ** 2 - squared_sv_center)[self.alpha_ind:]] mat_b[:self.l, :] = (mat_vt.T * np.array(sigma_tilde)).T mat_b[self.l:, :] = np.zeros((self.b_size, self.m))
def _rand_svd_sketch(self, mat_b): # use fbpca rand_svd (PCA) function to approximate PCA # only want first l values, # do we care about block size for power iteration method? mat_u, vec_sigma, mat_vt = rand_svd(mat_b, self.l, raw=True) # need to return an (l + b) X ncols matrix, so add b rows of zero to result extra_rows = self.b_size vec_sigma = np.hstack((vec_sigma, np.zeros(extra_rows))) mat_vt = np.vstack((mat_vt, np.zeros((extra_rows, self.m)))) squared_sv_center = vec_sigma[self.l-1] ** 2 if self.track_del: self.delta = self.delta + squared_sv_center sigma_tilda = [(0.0 if d < 0.0 else math.sqrt(d)) for d in (vec_sigma ** 2 - squared_sv_center)] return np.dot(np.diagflat(sigma_tilda), mat_vt)
def _old_rand_svd_sketch(self, mat_b): def old_svd(): if (self.l + self.b_size > self.m): # then vec_sigma, mat_vt will be m, m X m respectively, we need to make them larger extra_rows = self.l + self.b_size - self.m vec_sigma = np.hstack((vec_sigma, np.zeros(extra_rows))) mat_vt = np.vstack((mat_vt, np.zeros((extra_rows, self.m)))) # obtain squared singular value for threshold squared_sv_center = vec_sigma[self.del_ind] ** 2 # update sigma to shrink the row norms, only subtract from alpha_ind to end of vector sigma_tilde = list(vec_sigma[:self.alpha_ind]) + [(0.0 if d < 0.0 else math.sqrt(d)) for d in (vec_sigma ** 2 - squared_sv_center)[self.alpha_ind:]] # update matrix B where at least half rows are all zero mat_b[:] = np.dot(np.diagflat(sigma_tilde), mat_vt) # use fbpca rand_svd (PCA) function to approximate PCA # do we care about block size for power iteration method? mat_u, vec_sigma, mat_vt = rand_svd(mat_b, self.l, raw=True) # need to return an (l + b) X ncols matrix, so add b rows of zero to result extra_rows = self.b_size vec_sigma = np.hstack((vec_sigma, np.zeros(extra_rows))) mat_vt = np.vstack((mat_vt, np.zeros((extra_rows, self.m)))) squared_sv_center = vec_sigma[self.del_ind] ** 2 sigma_tilde = list(vec_sigma[:self.alpha_ind]) + [(0.0 if d < 0.0 else math.sqrt(d)) for d in (vec_sigma ** 2 - squared_sv_center)[self.alpha_ind:]] return np.dot(np.diagflat(sigma_tilde), mat_vt)