Ejemplo n.º 1
0
def precondition(x_mat, sketch_type='srft', sketch_size=3):
    '''
    Compute A Preconditioning Matrix
    
    Input
        x_mat: n-by-d NumPy matrix X;
        sketch_type: 'srft' or 'count';
        sketch_size: real number larger than 1;
                    it should be set as a small number, e.g. 3;
                    big sketch_size leads to good condition number, but costs more time to compute.
    
    Output
        t_mat: d-by-d Numpy matrix T such that X * T is well-conditioned.
    '''
    n_int, d_int = x_mat.shape
    s_int = int(sketch_size * d_int)
    
    if sketch_type == 'srft':
        b_mat = srft.srft(x_mat.T, s_int)
    elif sketch_type == 'count':
        b_mat = cs.countsketch(x_mat.T, s_int)
        
    u_mat, sig_vec, _ = numpy.linalg.svd(b_mat, full_matrices=False)
    t_mat = u_mat / sig_vec.reshape(1, len(sig_vec))
    return t_mat
Ejemplo n.º 2
0
    def test_size(self):
        s_int = 19

        c_mat = cs.countsketch(x_mat, s_int)
        self.assertEqual(c_mat.shape[0], m_int)
        self.assertEqual(c_mat.shape[1], s_int)

        c_mat, d_mat = cs.countsketch2(x_mat, y_mat, s_int)
        self.assertEqual(c_mat.shape[0], m_int)
        self.assertEqual(c_mat.shape[1], s_int)
        self.assertEqual(d_mat.shape[0], d_int)
        self.assertEqual(d_mat.shape[1], s_int)
Ejemplo n.º 3
0
    def test_multiply_error(self):
        '''
        Test the function "countsketch"
        As the sketch size s_int increases, the approximation error should decrease.
        If the test fails, say twice in 10 tests, it is fine.
        '''
        repeat = 10

        s_int1 = 150
        err1 = 0
        for i in range(repeat):
            c_mat = cs.countsketch(x_mat, s_int1)
            err1 += numpy.linalg.norm(xx_mat - numpy.dot(c_mat, c_mat.T),
                                      ord='fro') / xx_norm
        err1 /= repeat

        s_int2 = 400
        err2 = 0
        for i in range(repeat):
            c_mat = cs.countsketch(x_mat, s_int2)
            err2 += numpy.linalg.norm(xx_mat - numpy.dot(c_mat, c_mat.T),
                                      ord='fro') / xx_norm
        err2 /= repeat

        s_int3 = 1500
        err3 = 0
        for i in range(repeat):
            c_mat = cs.countsketch(x_mat, s_int3)
            err3 += numpy.linalg.norm(xx_mat - numpy.dot(c_mat, c_mat.T),
                                      ord='fro') / xx_norm
        err3 /= repeat

        print('Approximation error for s=' + str(s_int1) + ':    ' + str(err1))
        print('Approximation error for s=' + str(s_int2) + ':    ' + str(err2))
        print('Approximation error for s=' + str(s_int3) + ':    ' + str(err3))
        self.assertTrue(err2 < err1)
        self.assertTrue(err3 < err2)
Ejemplo n.º 4
0
def lev_approx_fast(a_mat, sketch_size=5, sketch_type='count', speedup=2):
    '''
    Compute Approximate Exact Column Leverage Scores
    
    This algorithm is useful only if m_int is big
    
    Input
        a_mat: m-by-n dense matrix A (n >> m);
        sketch_size: a real number bigger than 1 (s = sketch_size * m)
        sketch_type: 'count' or 'srft';
                    'count' for count sketch;
                    'srft' for subsampled randomized Fourier transform;
        speedup: a real number bigger than 1.
    
    Output
        lev_vec: n-dim vector containing the approximate leverage scores
        
    Procedure
        1. sketch size: s_int = m_int * sketch_size
        2. draw m-by-s sketch B = A * S, where S is n-by-s count sketch matrix
        3. compute the SVD B = U * Sig * V
        4. let T = Sig^{-1} * U^T
        5. let p = m / speedup and generate p-by-m Gaussian projection matrix P
        5. Y = (P * T) * A
        6. return the n column leverage scores of Y
    '''
    m_int, n_int = a_mat.shape

    # p_int must be smaller than m_int
    p_int = int(m_int / speedup)

    s_int = min(m_int * sketch_size, int(n_int / 2))
    if sketch_type == 'count':
        b_mat = cs.countsketch(a_mat, s_int)
    elif sketch_type == 'srft':
        b_mat = srft.srft(a_mat, s_int)
    u_mat, sig_vec, _ = numpy.linalg.svd(b_mat, full_matrices=False)
    t_mat = u_mat.T / sig_vec.reshape(len(sig_vec), 1)

    p_mat = numpy.random.randn(p_int, m_int) / numpy.sqrt(p_int)
    t_mat = numpy.dot(p_mat, t_mat)

    y_mat = numpy.dot(t_mat, a_mat)
    lev_vec = numpy.sum(y_mat**2, axis=0)
    return lev_vec
Ejemplo n.º 5
0
def lev_approx(a_mat, sketch_size=5, sketch_type='count'):
    '''
    Compute Approximate Column Leverage Scores
    
    Input
        a_mat: m-by-n dense matrix A (n >> m);
        sketch_size: a real number bigger than 1 (s = sketch_size * m)
        sketch_type: 'count' or 'srft';
                    'count' for count sketch;
                    'srft' for subsampled randomized Fourier transform;
                    'uniform' for uniform sampling.
    
    Output
        lev_vec: n-dim vector containing the approximate leverage scores
        
    Procedure
        1. sketch size: s_int = m_int * sketch_size
        2. draw m-by-s sketch B = A * S, where S is n-by-s sketching matrix
        3. compute the SVD B = U * Sig * V
        4. let T = Sig^{-1} * U^T
        5. Y = T * A
        6. return the n column leverage scores of Y
    '''
    m_int, n_int = a_mat.shape
    s_int = int(m_int * sketch_size)
    if sketch_type == 'count':
        b_mat = cs.countsketch(a_mat, s_int)
    elif sketch_type == 'srft':
        b_mat = srft.srft(a_mat, s_int)
    elif sketch_type == 'uniform':
        idx_vec = numpy.random.choice(n_int, s_int, replace=False)
        b_mat = a_mat[:, idx_vec] * (n_int / s_int)
    u_mat, sig_vec, _ = numpy.linalg.svd(b_mat, full_matrices=False)
    t_mat = u_mat.T / sig_vec.reshape(len(sig_vec), 1)
    y_mat = numpy.dot(t_mat, a_mat)
    lev_vec = numpy.sum(y_mat**2, axis=0)
    return lev_vec