def precondition(x_mat, sketch_type='srft', sketch_size=3): ''' Compute A Preconditioning Matrix Input x_mat: n-by-d NumPy matrix X; sketch_type: 'srft' or 'count'; sketch_size: real number larger than 1; it should be set as a small number, e.g. 3; big sketch_size leads to good condition number, but costs more time to compute. Output t_mat: d-by-d Numpy matrix T such that X * T is well-conditioned. ''' n_int, d_int = x_mat.shape s_int = int(sketch_size * d_int) if sketch_type == 'srft': b_mat = srft.srft(x_mat.T, s_int) elif sketch_type == 'count': b_mat = cs.countsketch(x_mat.T, s_int) u_mat, sig_vec, _ = numpy.linalg.svd(b_mat, full_matrices=False) t_mat = u_mat / sig_vec.reshape(1, len(sig_vec)) return t_mat
def test_size(self): s_int = 39 c_mat = srft.srft(x_mat, s_int) self.assertEqual(c_mat.shape[0], m_int) self.assertEqual(c_mat.shape[1], s_int) c_mat, d_mat = srft.srft2(x_mat, y_mat, s_int) self.assertEqual(c_mat.shape[0], m_int) self.assertEqual(c_mat.shape[1], s_int) self.assertEqual(d_mat.shape[0], d_int) self.assertEqual(d_mat.shape[1], s_int)
def test_multiply_error(self): ''' Test the function "srft" As the sketch size s_int increases, the approximation error should decrease. If the test fails, say twice in 10 tests, it is fine. ''' repeat = 10 s_int1 = 150 err1 = 0 for i in range(repeat): c_mat = srft.srft(x_mat, s_int1) err1 += numpy.linalg.norm(xx_mat - numpy.dot(c_mat, c_mat.T), ord='fro') / xx_norm err1 /= repeat s_int2 = 400 err2 = 0 for i in range(repeat): c_mat = srft.srft(x_mat, s_int2) err2 += numpy.linalg.norm(xx_mat - numpy.dot(c_mat, c_mat.T), ord='fro') / xx_norm err2 /= repeat s_int3 = 1500 err3 = 0 for i in range(repeat): c_mat = srft.srft(x_mat, s_int3) err3 += numpy.linalg.norm(xx_mat - numpy.dot(c_mat, c_mat.T), ord='fro') / xx_norm err3 /= repeat print('Approximation error for s=' + str(s_int1) + ': ' + str(err1)) print('Approximation error for s=' + str(s_int2) + ': ' + str(err2)) print('Approximation error for s=' + str(s_int3) + ': ' + str(err3)) self.assertTrue(err2 < err1) self.assertTrue(err3 < err2)
def lev_approx_fast(a_mat, sketch_size=5, sketch_type='count', speedup=2): ''' Compute Approximate Exact Column Leverage Scores This algorithm is useful only if m_int is big Input a_mat: m-by-n dense matrix A (n >> m); sketch_size: a real number bigger than 1 (s = sketch_size * m) sketch_type: 'count' or 'srft'; 'count' for count sketch; 'srft' for subsampled randomized Fourier transform; speedup: a real number bigger than 1. Output lev_vec: n-dim vector containing the approximate leverage scores Procedure 1. sketch size: s_int = m_int * sketch_size 2. draw m-by-s sketch B = A * S, where S is n-by-s count sketch matrix 3. compute the SVD B = U * Sig * V 4. let T = Sig^{-1} * U^T 5. let p = m / speedup and generate p-by-m Gaussian projection matrix P 5. Y = (P * T) * A 6. return the n column leverage scores of Y ''' m_int, n_int = a_mat.shape # p_int must be smaller than m_int p_int = int(m_int / speedup) s_int = min(m_int * sketch_size, int(n_int / 2)) if sketch_type == 'count': b_mat = cs.countsketch(a_mat, s_int) elif sketch_type == 'srft': b_mat = srft.srft(a_mat, s_int) u_mat, sig_vec, _ = numpy.linalg.svd(b_mat, full_matrices=False) t_mat = u_mat.T / sig_vec.reshape(len(sig_vec), 1) p_mat = numpy.random.randn(p_int, m_int) / numpy.sqrt(p_int) t_mat = numpy.dot(p_mat, t_mat) y_mat = numpy.dot(t_mat, a_mat) lev_vec = numpy.sum(y_mat**2, axis=0) return lev_vec
def lev_approx(a_mat, sketch_size=5, sketch_type='count'): ''' Compute Approximate Column Leverage Scores Input a_mat: m-by-n dense matrix A (n >> m); sketch_size: a real number bigger than 1 (s = sketch_size * m) sketch_type: 'count' or 'srft'; 'count' for count sketch; 'srft' for subsampled randomized Fourier transform; 'uniform' for uniform sampling. Output lev_vec: n-dim vector containing the approximate leverage scores Procedure 1. sketch size: s_int = m_int * sketch_size 2. draw m-by-s sketch B = A * S, where S is n-by-s sketching matrix 3. compute the SVD B = U * Sig * V 4. let T = Sig^{-1} * U^T 5. Y = T * A 6. return the n column leverage scores of Y ''' m_int, n_int = a_mat.shape s_int = int(m_int * sketch_size) if sketch_type == 'count': b_mat = cs.countsketch(a_mat, s_int) elif sketch_type == 'srft': b_mat = srft.srft(a_mat, s_int) elif sketch_type == 'uniform': idx_vec = numpy.random.choice(n_int, s_int, replace=False) b_mat = a_mat[:, idx_vec] * (n_int / s_int) u_mat, sig_vec, _ = numpy.linalg.svd(b_mat, full_matrices=False) t_mat = u_mat.T / sig_vec.reshape(len(sig_vec), 1) y_mat = numpy.dot(t_mat, a_mat) lev_vec = numpy.sum(y_mat**2, axis=0) return lev_vec