Ejemplo n.º 1
0
    def lsaTransform(self, dimensions=1):
        """ Calculate SVD of objects matrix: U . SIGMA . VT = MATRIX 
            Reduce the dimension of sigma by specified factor producing sigma'. 
            Then dot product the matrices:  U . SIGMA' . VT = MATRIX'
        """
        rows, cols = self.matrix.shape

        if dimensions <= rows:  #Its a valid reduction

            #Sigma comes out as a list rather than a matrix
            u, sigma, vt = linalg.svd(self.matrix)

            #Dimension reduction, build SIGMA'
            for index in xrange(rows - dimensions, rows):
                sigma[index] = 0

            print linalg.diagsvd(sigma, len(self.matrix), len(vt))

            #Reconstruct MATRIX'
            reconstructedMatrix = dot(
                dot(u, linalg.diagsvd(sigma, len(self.matrix), len(vt))), vt)

            #Save transform
            self.matrix = reconstructedMatrix

        else:
            print "dimension reduction cannot be greater than %s" % rows
Ejemplo n.º 2
0
    def lsaTransform(self,dimensions=1):
        """ Calculate SVD of objects matrix: U . SIGMA . VT = MATRIX 
            Reduce the dimension of sigma by specified factor producing sigma'. 
            Then dot product the matrices:  U . SIGMA' . VT = MATRIX'
        """
        rows,cols= self.matrix.shape

        if dimensions <= rows: #Its a valid reduction

            #Sigma comes out as a list rather than a matrix
            u,sigma,vt = linalg.svd(self.matrix)

            #Dimension reduction, build SIGMA'
            for index in xrange(rows-dimensions, rows):
                sigma[index]=0

            print linalg.diagsvd(sigma,len(self.matrix), len(vt))        

            #Reconstruct MATRIX'
            reconstructedMatrix= dot(dot(u,linalg.diagsvd(sigma,len(self.matrix),len(vt))),vt)

            #Save transform
            self.matrix=reconstructedMatrix

        else:
            print "dimension reduction cannot be greater than %s" % rows
Ejemplo n.º 3
0
    def __init__(self, data=None, sym=None):
        super(SvdArray, self).__init__(data=data, sym=sym)

        u, s, v = np.linalg.svd(self.x, full_matrices=1)
        self.u, self.s, self.v = u, s, v
        self.sdiag = linalg.diagsvd(s, *x.shape)
        self.sinvdiag = linalg.diagsvd(1./s, *x.shape)
Ejemplo n.º 4
0
def sparse_stable_svd(R, nboot=50):
    # generate the boots
    boots = [np.random.random_integers(0,len(R)-1,len(R))
             for i in xrange(nboot)]

    # calc the original SVD
    U, s, Vh = np.linalg.svd(np.concatenate(R), full_matrices=False)
    
    # do the boots
    rVs = []
    for i in range(len(boots)):
        Ub, sb, Vhb = np.linalg.svd(np.concatenate(R[boots[i]]), full_matrices=False)

        rmat = procrustes(U,Ub)

        rVs.append(np.dot(rmat,np.dot(diagsvd(sb,len(sb),len(sb)),Vhb)))
        
    # get the bootstrap ratios
    rVs = np.array(rVs)
    Vs = np.dot(diagsvd(s,len(s),len(s)),Vh)
    boot_ratio = Vs/rVs.std(0)
    
    # pass the boot ratios through fdrtool to pick stable features
    fachist = np.histogram(boot_ratio.flatten(),bins=500)
    peak = fachist[1][fachist[0]==np.max(fachist[0])][0]
    results = fdrtool.fdrtool(FloatVector(boot_ratio.flatten()-peak), statistic='normal', 
                              plot=False, verbose=False)
    qv = np.array(results.rx('qval')).reshape(boot_ratio.shape)
    #qv = None
    # apply the thresh
    return U,s,Vh,qv,boot_ratio
Ejemplo n.º 5
0
    def __init__(self, data=None, sym=None):
        super(SvdArray, self).__init__(data=data, sym=sym)

        u, s, v = np.linalg.svd(self.x, full_matrices=1)
        self.u, self.s, self.v = u, s, v
        self.sdiag = linalg.diagsvd(s, *x.shape)
        self.sinvdiag = linalg.diagsvd(1. / s, *x.shape)
Ejemplo n.º 6
0
def svd_spectro_perf(fl,iv,re,log=None):
    t0 = time.time()
    ## compute R and F
    R = sp.sqrt(iv)*re
    R = R.T
    F = sp.sqrt(iv)*fl

    ## svd decomposition
    u,s,vt = linalg.svd(R)
    one = linalg.diagsvd(s*0+1,R.shape[0],R.shape[1])
    s = linalg.diagsvd(s,R.shape[0],R.shape[1])

    flux = vt.T.dot(one.T.dot(u.T.dot(F)))
    Q = vt.T.dot(sp.sqrt(s.T.dot(s)).dot(vt))

    norm = Q.sum(axis=1)
    w=norm>0
    Q[w,:] = Q[w,:]/norm[w,None]
    flux[w]/=norm[w] 
    ivar = norm**2

    t = time.time()
    sys.stdout.write("spectro perfected in: {} \n".format(t-t0))
    if log is not None:
        log.write("spectro perfected in: {} \n".format(t-t0))
    return flux,ivar,Q
Ejemplo n.º 7
0
 def train(self):
     # make word-doc vector
     for index, passage in enumerate(self.passages):
         self.__parse(passage, index)
     self.__build(len(self.passages))
     
     print self.matrix.shape
     
     print self
     self.tfidfTransform()
     #print self
     
     # SVD
     self.u, self.sigma, self.vt = linalg.svd(self.matrix)
     print self.u.shape
     print len(self.sigma)
     print self.vt.shape
     
     self.sigma_1 = linalg.diagsvd(self.sigma,len(self.sigma), len(self.sigma)) ** -1
     
     print self.sigma_1
     
     print self.sigma_1 * self.sigma
     
     print linalg.diagsvd(self.sigma,len(self.sigma), len(self.sigma))
     
     # calculate doc concpets
     pass
Ejemplo n.º 8
0
    def train(self):
        # make word-doc vector
        for index, passage in enumerate(self.passages):
            self.__parse(passage, index)
        self.__build(len(self.passages))

        print self.matrix.shape

        print self
        self.tfidfTransform()
        #print self

        # SVD
        self.u, self.sigma, self.vt = linalg.svd(self.matrix)
        print self.u.shape
        print len(self.sigma)
        print self.vt.shape

        self.sigma_1 = linalg.diagsvd(self.sigma, len(self.sigma),
                                      len(self.sigma))**-1

        print self.sigma_1

        print self.sigma_1 * self.sigma

        print linalg.diagsvd(self.sigma, len(self.sigma), len(self.sigma))

        # calculate doc concpets
        pass
Ejemplo n.º 9
0
def image_svd(n):
    img=mpimg.imread('image.jpg')
    [r,g,b] = [img[:,:,i] for i in range(3)]
    r_1,r_2,r_3 = sp.svd(r)
    g_1,g_2,g_3 = sp.svd(g)
    b_1,b_2,b_3 = sp.svd(b)
    r2_nonzero=(r_2!=0).sum()
    g2_nonzero=(g_2!=0).sum()
    b2_nonzero=(b_2!=0).sum()
    print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r2_nonzero,"," ,g2_nonzero,"and" ,b2_nonzero, "respectively.")
    
    r_2[n:800]=np.zeros_like(r_2[n:800])
    g_2[n:800]=np.zeros_like(g_2[n:800])
    b_2[n:800]=np.zeros_like(b_2[n:800])
    
    # change the dimension to (800,1000) 
    r_2=sp.diagsvd(r_2,800,1000)
    g_2=sp.diagsvd(g_2,800,1000)
    b_2=sp.diagsvd(b_2,800,1000)
    
    #dot multiplication
    r_new=np.dot(r_1, np.dot(r_2,r_3))
    g_new=np.dot(g_1, np.dot(g_2,g_3))
    b_new=np.dot(b_1, np.dot(b_2,b_3))

    img[:,:,0]=r_new
    img[:,:,1]=g_new
    img[:,:,2]=b_new
    
    #plot the images
    fig = plt.figure(2)
    ax1 = fig.add_subplot(2,2,1)
    ax2 = fig.add_subplot(2,2,2)
    ax3 = fig.add_subplot(2,2,3)
    ax4 = fig.add_subplot(2,2,4)
    
    ax1.imshow(img)
    ax2.imshow(r, cmap = 'Reds')
    ax3.imshow(g, cmap = 'Greens')
    ax4.imshow(b, cmap = 'Blues')
    plt.show()
    
    #original image
    img=mpimg.imread('image.jpg')
    [r,g,b]=[img[:,:,i] for i in range(3)]
    fig=plt.figure(1)    
    ax1 =  fig.add_subplot(2,2,1)
    ax2 = fig.add_subplot(2,2,2)
    ax3 = fig.add_subplot(2,2,3)
    ax4 = fig.add_subplot(2,2,4)
    ax1.imshow(img)
    ax2.imshow(r, cmap = 'Reds')
    ax3.imshow(g, cmap = 'Greens')
    ax4.imshow(b, cmap = 'Blues')
    plt.show()
Ejemplo n.º 10
0
    def test_less_accurate_than_full_svd(self):
        A = lowrank(100, 100)

        U, s, Vh = randomized_svd.randomized_svd(A, 10)
        S = la.diagsvd(s, U.shape[1], U.shape[1])
        randomized_err = la.norm(U.dot(S).dot(Vh) - A, 2)

        U, s, Vh = self.full_svd(A)
        S = la.diagsvd(s, U.shape[1], U.shape[1])
        full_err = la.norm(U.dot(S).dot(Vh) - A, 2)

        self.assertGreater(1e-2 * randomized_err, full_err)
Ejemplo n.º 11
0
def svd(n):
    img=mpimg.imread('tree.jpg')
    [r,g,b] = [img[:,:,i] for i in range(3)]
    r1,r2,r3 = sp.svd(r)
    g1,g2,g3 = sp.svd(g)
    b1,b2,b3 = sp.svd(b)
    r_nonzero=(r2!=0).sum() #count how many non zero matrix
    g_nonzero=(g2!=0).sum()
    b_nonzero=(b2!=0).sum()
    print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r_nonzero,"," ,g_nonzero,"and" ,b_nonzero, "respectively.")
    
    #create a matrix for dot multiplication
    r2[n:800]=np.zeros_like(r2[n:800])
    g2[n:800]=np.zeros_like(g2[n:800])
    b2[n:800]=np.zeros_like(b2[n:800])
    r2=sp.diagsvd(r2,800,1000)
    g2=sp.diagsvd(g2,800,1000)
    b2=sp.diagsvd(b2,800,1000)
    r_new=np.dot(r1, np.dot(r2,r3))
    g_new=np.dot(g1, np.dot(g2,g3))
    b_new=np.dot(b1, np.dot(b2,b3))
    
    #Create and display new resolution image
    img[:,:,0]=r_new
    img[:,:,1]=g_new
    img[:,:,2]=b_new
    
    display the modified picture
    fig = plt.figure(2)
    ax1 = fig.add_subplot(2,2,1)
    ax2 = fig.add_subplot(2,2,2)
    ax3 = fig.add_subplot(2,2,3)
    ax4 = fig.add_subplot(2,2,4)
    ax1.imshow(img)
    ax2.imshow(r, cmap = 'Reds')
    ax3.imshow(g, cmap = 'Greens')
    ax4.imshow(b, cmap = 'Blues')
    plt.show()
    
    #display original picture
    img=mpimg.imread('tree.jpg')
    [r,g,b]=[img[:,:,i] for i in range(3)]
    fig=plt.figure(1)
    ax1=fig.add_subplot(2,2,1)
    ax2 = fig.add_subplot(2,2,2)
    ax3 = fig.add_subplot(2,2,3)
    ax4 = fig.add_subplot(2,2,4)
    ax1.imshow(img)
    ax2.imshow(r, cmap = 'Reds')
    ax3.imshow(g, cmap = 'Greens')
    ax4.imshow(b, cmap = 'Blues')
    plt.show()
Ejemplo n.º 12
0
def image_svd(n):
    # read image
    img=mpimg.imread('SnakeDance.jpg')

    # generate rgb array
    [r,g,b] = [img[:,:,i] for i in range(3)]
        
    # generate U, sigma,and V for red, green and blue matrix
    #noted that r1=U, r2=sigma, r3=V, same goes to green and blue matrix
    r1, r2, r3 = linalg.svd(r)
    g1, g2, g3 = linalg.svd(g)
    b1, b2, b3 = linalg.svd(b)
    
    #check the number of non zero elements in each color of decompose sigma
    r2_nonzero=(r2!=0).sum()
    g2_nonzero=(g2!=0).sum()
    b2_nonzero=(b2!=0).sum()
    print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r2_nonzero,"," ,g2_nonzero,"and" ,b2_nonzero, "respectively.")
    
    
    # keeping first n none zero elements
    r2[n:800] = np.zeros_like(r2[n:800])
    g2[n:800] = np.zeros_like(g2[n:800])
    b2[n:800] = np.zeros_like(b2[n:800])
    
    # creating diagonal matrix to perform dot multiplication
    #change the dimension of r2 to (800,1000), since original r2 from linalg.svd is (800,1)
    #can check dimension with r2.shape
    r2 = linalg.diagsvd(r2,800,1000)
    g2 = linalg.diagsvd(g2,800,1000)
    b2 = linalg.diagsvd(b2,800,1000)
    
    # perform dot multiplication to create lower resolutuion mariric 
    r_new = np.dot(r1, np.dot(r2, r3))
    g_new = np.dot(g1, np.dot(g2, g3))
    b_new = np.dot(b1, np.dot(b2, b3))
      
    img[:,:,0]=r_new
    img[:,:,1]=g_new
    img[:,:,2]=b_new
    
    fig2 = plt.figure(2)
    ax1 = fig2.add_subplot(2,2,1)
    ax2 = fig2.add_subplot(2,2,2)
    ax3 = fig2.add_subplot(2,2,3)
    ax4 = fig2.add_subplot(2,2,4)
    ax1.imshow(img)
    ax2.imshow(r_new, cmap = 'Reds')
    ax3.imshow(g_new, cmap = 'Greens')
    ax4.imshow(b_new, cmap = 'Blues')
    plt.show() 
Ejemplo n.º 13
0
def svd_a_inv(a, b, full_matrices=True):
    U, s, Vh = linalg.svd(a, full_matrices)
    # print U.shape, s.shape, Vh.shape
    S = linalg.diagsvd(s, a.shape[1], a.shape[1])

    if full_matrices == True:
        S_inv = linalg.diagsvd(
            np.linalg.inv(S).diagonal(), a.shape[1], a.shape[0])
    else:
        S_inv = np.linalg.inv(S)

        ah = np.dot(Vh.transpose(), np.dot(S_inv, U.transpose()))
        s = np.dot(ah, b)
    return ah, s
Ejemplo n.º 14
0
def MStep(e_tau2, e_delta2, X, y):
    # re-estimate a new w
    d = X.shape[1]
    phi = sl.inv(np.diag(e_tau2.ravel()))
    U, D, Vt = sl.svd(X, full_matrices=False)  # D: (8, ), U: (67, 8)
    D1 = sl.diagsvd(D**-1, d, d)
    D2 = sl.diagsvd(D**-2, d, d)

    val1 = np.dot(phi, Vt.T)
    val2 = np.dot(Vt, np.dot(phi, Vt.T)) + (1 / e_delta2) * D2
    val3 = np.dot(D1, np.dot(U.T, y))

    w_new = np.dot(val1, np.dot(sl.inv(val2), val3))
    return w_new
def multivariateGaussian(X, mu, sigma2):
    #MULTIVARIATEGAUSSIAN Computes the probability density function of the
    #multivariate gaussian distribution.
    #    p = MULTIVARIATEGAUSSIAN(X, mu, sigma2) Computes the probability
    #    density function of the examples X under the multivariate gaussian
    #    distribution with parameters mu and sigma2. If sigma2 is a matrix, it is
    #    treated as the covariance matrix. If sigma2 is a vector, it is treated
    #    as the \sigma^2 values of the variances in each dimension (a diagonal
    #    covariance matrix)
    #

    k = len(mu)

    # turns 1D array into 2D array
    if sigma2.ndim == 1:
        sigma2 = np.reshape(sigma2, (-1, sigma2.shape[0]))

    if sigma2.shape[1] == 1 or sigma2.shape[0] == 1:
        sigma2 = linalg.diagsvd(sigma2.flatten(), len(sigma2.flatten()),
                                len(sigma2.flatten()))

    # mu is unrolled (and transposed) here
    X = X - mu.reshape(mu.size, order='F').T

    p = np.dot(np.power(2 * np.pi, - k / 2.0), np.power(np.linalg.det(sigma2), -0.5) ) * \
        np.exp(-0.5 * np.sum(np.dot(X, np.linalg.pinv(sigma2)) * X, axis=1))

    return p
Ejemplo n.º 16
0
 def svd(self, major_allele_matrix):
     """
     Follows procedure of Population Structure and Eigenanalysis Patterson et al 2006.
     Constructs a genotype matrix of bi-allelic loci where each entry is the number of copies of the major allele at
     each locus. The genotype matrix has dimensions (number_of_individuals)*(number_of_markers)
     """
     if 'popdata' in list(major_allele_matrix.columns):
         major_allele_matrix.drop('popdata', axis=1)
     shifting_factor = np.apply_along_axis(np.mean,
                                           axis=1,
                                           arr=major_allele_matrix)
     p_vector = np.divide(shifting_factor, 2)
     scaling_factor = np.sqrt(np.multiply(p_vector, (1 - p_vector)))
     corrected_matrix = np.array(
         pd.DataFrame(
             list(
                 map(
                     lambda i:
                     (major_allele_matrix.ix[major_allele_matrix.index[
                         i], :] - shifting_factor[i]) / scaling_factor[i],
                     range(self.population_size)))))
     # singular value decomposition using scipy linalg module
     eigenvectors, s, v = linalg.svd(corrected_matrix)
     eigenvalues = np.diagonal(
         np.square(
             linalg.diagsvd(s, self.population_size,
                            self.number_of_markers))).T
     sum_of_eigenvalues = np.sum(eigenvalues)
     fraction_of_variance = np.divide(eigenvalues, sum_of_eigenvalues)
     eigenvalues = np.vstack((eigenvalues, fraction_of_variance))
     return eigenvectors, eigenvalues
def pca(X):
    #PCA Run principal component analysis on the dataset X
    #   [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X
    #   Returns the eigenvectors U, the eigenvalues (on diagonal) in S
    #

    # Useful values
    m, n = X.shape

    # You need to return the following variables correctly.
    U = np.zeros(n)
    S = np.zeros(n)

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should first compute the covariance matrix. Then, you
    #               should use the "svd" function to compute the eigenvectors
    #               and eigenvalues of the covariance matrix. 
    #
    # Note: When computing the covariance matrix, remember to divide by m (the
    #       number of examples).
    #

    # compute the covariance matrix
    sigma = (1.0/m) * (X.T).dot(X)

    # compute the eigenvectors (U) and S
    # from: 
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.svd.html#scipy.linalg.svd
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.diagsvd.html#scipy.linalg.diagsvd
    U, S, Vh = linalg.svd(sigma)
    S = linalg.diagsvd(S, len(S), len(S))

    # =========================================================================

    return U, S
Ejemplo n.º 18
0
def OLS(X, data):
    """Ordinary least squared using singular value decomposition (SVD)"""
    X = np.copy(X)
    U, s, VT = scl.svd(X)
    D = scl.diagsvd(s, U.shape[0], VT.shape[0])
    beta = VT.T @ scl.pinv(D) @ U.T @ data
    return beta
Ejemplo n.º 19
0
def truncatedSVD(x, rank=1):
    assert isATensor(x) and not isAVector(x), "`x` must be a matrix or tensor!"
    u, s, v = svd(x)
    u = u[:, :rank]
    s = diagsvd(s[:rank], rank, rank)
    v = v[:rank, :]
    return u, s, v
Ejemplo n.º 20
0
def question_a():
    # matrix_m = [[1, 2], [2, 1], [3, 4], [4, 3]]
    # U,sig,Vh = linalg.svd(matrix_m, full_matrices = False)
    # print(U)
    # print(sig)
    # print(Vh)

    # A = np.array([[1, 2, 3], [4, 5, 6]])
    matrix_m = np.array([[1, 2], [2, 1], [3, 4], [4, 3]])
    print(matrix_m[1])
    # A = np.array([[1, 2, 3, 4], [2, 1, 4, 3]])
    print(matrix_m)
    # array([[1, 2, 3],
    #        [4, 5, 6]])
    M, N = matrix_m.shape
    U, s, Vh = linalg.svd(matrix_m)
    Sig = linalg.diagsvd(s, M, N)[0:2, 0:2]
    # U, Vh = U, Vh
    U = U[0:4, 0:2]
    print(U)
    # array([[-0.3863177, -0.92236578],
    #        [-0.92236578, 0.3863177]])
    print(Sig)
    # Sig = np.array([[7.61577311, 0], [0, 1.41421356]])
    # array([[9.508032, 0., 0.],
    #        [0., 0.77286964, 0.]])

    print(Vh)
    # Vh = np.array([[-0.27854301, -0.27854301, -0.64993368, -0.64993368],
    #       [0.5, -0.5, 0.5, -0.5]])
    # array([[-0.42866713, -0.56630692, -0.7039467],
    #        [0.80596391, 0.11238241, -0.58119908],
    #        [0.40824829, -0.81649658, 0.40824829]])
    print(U.dot(Sig.dot(Vh)))  # check computation
Ejemplo n.º 21
0
def GetSubmatrWithWatermark(img, word):
    #Changing colorspace to work with Y component (luminance)
    #if (type(img[0][0][0]) == np.uint8):
    #img = np.float32 (img) * 1.0 / 255
    #img1 = cv2.cvtColor (img, cv2.COLOR_BGR2YCrCb)
    #img2 = cv2.cvtColor (img, cv2.COLOR_RGB2GRAY)
    if (type(img[0][0]) == np.uint8):
        img = np.float32(img) * 1.0 / 255

    #img2 = img1[:,:,0]
    #Perform wavelet transform
    coeffs = pywt.wavedec2(img, 'db1')
    #hl=coeffs[len(coeffs)-2][2]
    hh = coeffs[len(coeffs) - 2][1]
    #lh=coeffs[len(coeffs)-2][0]
    #Let's work with hh matrix
    dwtDom2Wtmk = hh
    #Perform first SVD
    Ui, si, Vhi = linalg.svd(dwtDom2Wtmk, full_matrices=True)
    Si = linalg.diagsvd(si, min(Ui.shape[0], Vhi.shape[0]),
                        max(Ui.shape[1], Vhi.shape[1]))
    watermark = GenerateWatermark(Si.shape, word)
    #Apply watermark
    Siw = Si + watermark
    #Perform second SVD
    Uwi, swi, Vhwi = linalg.svd(Siw, full_matrices=True)

    return (Uwi, Si, Vhwi)
Ejemplo n.º 22
0
def low_rank_approx(X,r):
    U, s, Vh = linalg.svd(X)
    s [r:] = 0
    sk = linalg.diagsvd(s, U.shape[1], Vh.shape[0])
    X_app = np.dot(U, np.dot(sk, Vh))
    X_app = X_app[:,:r]
    return X_app
Ejemplo n.º 23
0
def check_svd_function(svd_function):
    """check whether svd_function behaves as np.linalg.svd"""
    try:
        for dtype in [np.float32, np.float64, np.complex64, np.complex128]:
            print("dtype = ", dtype)
            for m, n in [(1, 1), (1, 10), (10, 1), (10, 10), (10, 20)]:
                print("m, n = ", m, n)
                tol_NULP = 200 * max(max(m, n)**3,
                                    100)  # quite large tolerance, but seems to be required...
                if np.dtype(dtype).kind == 'c':  # complex?
                    A = standard_normal_complex((m, n))
                else:
                    A = np.random.standard_normal(size=(m, n))
                A = np.asarray(A, dtype)
                Sonly = svd_function(A, compute_uv=False)

                Ufull, Sfull, VTfull = svd_function(A, full_matrices=True, compute_uv=True)
                npt.assert_array_almost_equal_nulp(Sonly, Sfull, tol_NULP)
                recalc = Ufull.dot(diagsvd(Sfull, m, n)).dot(VTfull)
                npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP)

                U, S, VT = svd_function(A, full_matrices=False, compute_uv=True)
                npt.assert_array_almost_equal_nulp(Sonly, S, tol_NULP)
                recalc = U.dot(np.diag(S)).dot(VT)
                npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP)
            print("types of U, S, VT = ", U.dtype, S.dtype, VT.dtype)
            nst.eq_(U.dtype, A.dtype)
    except EnvironmentError as e:
        print(str(e))
        if str(e).startswith("Couldn't find LAPACK"):
            print("(Not an issue if you have scipy >= 0.18.0)")
        assert(False)
Ejemplo n.º 24
0
    def fs_c(self, percent=0.9, N=None):
        """Get the column factor scores (dimensionality-reduced representation),
        choosing how many factors to retain, directly or based on the explained
        variance.

        'percent': The minimum variance that the retained factors are required
                                to explain (default: 90% = 0.9)
        'N': The number of factors to retain. Overrides 'percent'.
                If the rank is less than N, N is ignored.
        """
        if not 0 <= percent <= 1:
                raise ValueError("Percent should be a real number between 0 and 1.")
        if N:
                if not isinstance(N, (int, np.int64)) or N <= 0:
                        raise ValueError("N should be a positive integer.")
                N = min(N, self.rank)  # maybe we should notify the user?
                # S = np.zeros((self._numitems, N))
        # else:
        self.k = 1 + np.flatnonzero(np.cumsum(self.L) >= sum(self.L)*percent)[0]
        #  S = np.zeros((self._numitems, self.k))
        # the sign of the square root can be either way; singular value vs. eigenvalue
        # np.fill_diagonal(S, -np.sqrt(self.E) if self.cor else self.s)
        num2ret = N if N else self.k
        s = -np.sqrt(self.L) if self.cor else self.s
        S = diagsvd(s[:num2ret], len(self.Q), num2ret)
        self.G = _mul(self.D_c, self.Q.T, S)  # important! note the transpose on Q
        return self.G
Ejemplo n.º 25
0
    def __init__(self, corpus, vocab):
        """
      Create CountVectorizer object,
      Create a tfidf array
      Use SVD (Singular Value Decomposition) to approximate tfidf array
      Pickle-able
    """

        self.v = CountVectorizer(vocabulary=vocab)

        X = self.v.fit_transform(corpus).toarray()

        transformer = TfidfTransformer()

        tfidf = transformer.fit_transform(X)

        # SVD
        M, N = X.shape

        U, s, Vt = linalg.svd(X)

        # Reduce Matrix to only 300 dimensions
        for i in range(len(s)):
            if i < 300:
                continue
            s[i] = 0

        Sig = linalg.diagsvd(s, M, N)

        print U.shape
        print Sig.shape
        print Vt.shape

        # Store approximated document-term Matrix
        self.dt = (U.dot(Sig.dot(Vt))).transpose()
Ejemplo n.º 26
0
def pca(X):
    #PCA Run principal component analysis on the dataset X
    #   [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X
    #   Returns the eigenvectors U, the eigenvalues (on diagonal) in S
    #

    # Useful values
    m, n = X.shape

    # You need to return the following variables correctly.
    U = np.zeros(n)
    S = np.zeros(n)

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should first compute the covariance matrix. Then, you
    #               should use the "svd" function to compute the eigenvectors
    #               and eigenvalues of the covariance matrix.
    #
    # Note: When computing the covariance matrix, remember to divide by m (the
    #       number of examples).
    #

    # compute the covariance matrix
    sigma = (1.0 / m) * (X.T).dot(X)

    # compute the eigenvectors (U) and S
    # from:
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.svd.html#scipy.linalg.svd
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.diagsvd.html#scipy.linalg.diagsvd
    U, S, Vh = linalg.svd(sigma)
    S = linalg.diagsvd(S, len(S), len(S))

    # =========================================================================

    return U, S
Ejemplo n.º 27
0
def svd_thresh(data, threshold=None, n_pc=None, thresh_type='hard'):
    """Threshold the singular values

    This method thresholds the input data using singular value decomposition

    Parameters
    ----------
    data : np.ndarray
        Input data array
    threshold : float, optional
        Threshold value
    n_pc : int or str, optional
        Number of principal components, specify an integer value or 'all'
    threshold_type : str {'hard', 'soft'}
        Type of noise to be added (default is 'hard')

    Returns
    -------
    np.ndarray thresholded data

    Raises
    ------
    ValueError
        For invalid string entry for n_pc

    """

    if isinstance(n_pc, str) and n_pc != 'all':
        raise ValueError('Invalid value for "n_pc", specify an integer value '
                         'or "all"')

    # Get SVD of input data.

    u, s, v = svd(data, check_finite=False, lapack_driver='gesvd')

    # Find the threshold if not provided.
    if isinstance(threshold, type(None)):

        # Find the required number of principal components if not specified.
        if isinstance(n_pc, type(None)):
            n_pc = find_n_pc(u, factor=0.1)

        # If the number of PCs is too large use all of the singular values.
        if n_pc >= s.size or n_pc == 'all':
            n_pc = s.size - 1
            warn('Using all singular values.')

        threshold = s[n_pc]

    # Remove noise from singular values.
    s_new = thresh(s, threshold, thresh_type)

    #if np.all(s_new == s):
    #    warn('No change to singular values.')

    # Reshape the singular values to the shape of the input image.
    s_new = diagsvd(s_new, *data.shape)

    # Return the thresholded image.
    return np.dot(u, np.dot(s_new, v))
Ejemplo n.º 28
0
def matrix_reduce_sigma(matrix, dimensions=1):
    """This calculates the SVD of the matrix, reduces it and 
        creates a reduced matrix.

        @params matrix the matrix to reduce
        @params dimensions dimensions to reduce. 

        @return matrix The reduced matrix
    """
    uu, sigma, vt = linalg.svd(matrix)
    rows = sigma.shape[0]
    cols = sigma.shape[1]

    #delete n-k smallest singular values 
    #delete ie settings to zero
    smallerBound = min(rows, cols)
    for index in xrange(smallerBound - dimensions, rows):
        sigma[index] = 0 
    
    #since sigma is a unidimensional array
    #convert it to a matrix 
    sigma_matrix = linalg.diagsvd(sigma, len(uu), len(vt))
    uu_sigma = numpy.dot(uu, sigma_matrix)
    uu_sigma_vt = numpy.dot(uu_sigma, vt)

    return uu_sigma_vt
Ejemplo n.º 29
0
def Tikhonov(Uh, s, V, b, alpha):
    '''
    Tikhonov: Tikhonov procedure for solving Ax = b via SVD.
    
    Inputs:
       Uh : Hermitian transpose of U, left singular vectors
       s : singular values
       V : right singular vectors
       
       Such that:
       U, s, Vh = svd(A) is the singular-value decomposition of matrix A,
            i.e., A = U @ S @ Vh,  s = diag(S)
            
       b: right-hand side of the linear system
       alpha: Tikhonov regularizaton parameter
    
    Output:
       x_alpha: the regularized solution
    '''

    # Construct the pseudoinverse 'Sp' of the diagonal matrix 'S'
    sigma = np.divide(s, alpha + s**2)
    Sp = diagsvd(sigma, V.shape[1], Uh.shape[0])

    # Return the Tikhonov-regularized solution
    return V @ (Sp @ (Uh @ b))
def multivariateGaussian(X, mu, sigma2):
    #MULTIVARIATEGAUSSIAN Computes the probability density function of the
    #multivariate gaussian distribution.
    #    p = MULTIVARIATEGAUSSIAN(X, mu, sigma2) Computes the probability 
    #    density function of the examples X under the multivariate gaussian 
    #    distribution with parameters mu and sigma2. If sigma2 is a matrix, it is
    #    treated as the covariance matrix. If sigma2 is a vector, it is treated
    #    as the \sigma^2 values of the variances in each dimension (a diagonal
    #    covariance matrix)
    #

    k = len(mu)

    # turns 1D array into 2D array
    if sigma2.ndim == 1:
        sigma2 = np.reshape(sigma2, (-1,sigma2.shape[0]))

    if sigma2.shape[1] == 1 or sigma2.shape[0] == 1:
        sigma2 = linalg.diagsvd(sigma2.flatten(), len(sigma2.flatten()), len(sigma2.flatten()))

    # mu is unrolled (and transposed) here
    X = X - mu.reshape(mu.size, order='F').T

    p = np.dot(np.power(2 * np.pi, - k / 2.0), np.power(np.linalg.det(sigma2), -0.5) ) * \
        np.exp(-0.5 * np.sum(np.dot(X, np.linalg.pinv(sigma2)) * X, axis=1))

    return p
Ejemplo n.º 31
0
def special_svd(M, K=9):
    useravg, itemavg = find_user_and_item_avg(M)
    R_norm = norm_matrix(M, useravg, itemavg)
    U, s, V = linalg.svd( R_norm, full_matrices = False)
    new_s = s[:K]
    sigma = linalg.diagsvd(new_s, K, K)
    return U[:,:K], V[:K,:], sigma
Ejemplo n.º 32
0
def plotFirst3PCA(X, labels=None, colors=None):
    '''
    Computes the first 3 principal components of the data
    matrix X, and shows the samples projected onto the 3 largest
    components using scatter3d()
    @param X: Input data, samples are in rows. It is advised to
    at least mean-center the data, but also to scale each input feature
    by dividing by standard deviation. Use svo_util.normalize() to
    do this.
    @param labels: A vector with length = rows(X), which has an integer
    label that indicates which class each sample belongs to. None means
    that the data is not classified, so all points will have the same
    color.
    @param colors: A list of color strings or numbers,
    one per label so that all points with the same label
    are colored the same. len(colors) == len( unique(labels) )
    @return: (T, W) where T is the data in pca-space and W are the
    loading weights. T and W can be used to reconstruct points from
    PCA space back to the 'normal' space, as with the function
    reconstructPCA().
    '''
    U,s,Vt = LA.svd(X, full_matrices=True)
    N,p = X.shape
    S = LA.diagsvd(s,N,p)
    T = U.dot(S)  #samples in PCA space (also, T = X.dot(V) where V=Vt.T)
    
    XYZ = T[:,0:3]  #first 3 columns are for the 3 largest components
    scatter3d(XYZ, labels=labels, colors=colors)
    
    return T, Vt.T  #return the transformed data, and the loading weights
Ejemplo n.º 33
0
def main(num=5):
    im = Image.open('me.png')
    pix = im.load()
    ma = [[], [], []]
    for x in xrange(im.size[0]):
        for i in xrange(3):
            ma[i].append([])
        for y in xrange(im.size[1]):
            for i in xrange(3):
                ma[i][-1].append(pix[x, y][i])
    for i in xrange(3):
        u, s, v = linalg.svd(ma[i])
        u = u[:, :num]
        v = v[:num, :]
        s = s[:num]
        ma[i] = dot(dot(u, linalg.diagsvd(s, num, num)), v)
    for x in xrange(im.size[0]):
        for y in xrange(im.size[1]):
            ret = []
            for i in xrange(3):
                tmp = int(ma[i][x][y])
                if tmp < 0:
                    tmp = 0
                if tmp > 255:
                    tmp = 255
                ret.append(tmp)
            pix[x, y] = tuple(ret)
    #im.show()
    im.save('me_%d.jpg' % num)
Ejemplo n.º 34
0
 def svd(self, matrix):
   matrix = numpy.mat(matrix);
   self._U_, self._SIGMA_, self._Vh_ = linalg.svd(matrix);
   #perform the SVD 
   self.M, self.N = matrix.shape;
   Sig = numpy.mat(linalg.diagsvd(self._SIGMA_, self.M, self.N)) 
   print Sig
Ejemplo n.º 35
0
    def fs_c(self, percent=0.9, N=None):
        """Get the column factor scores (dimensionality-reduced representation),
		choosing how many factors to retain, directly or based on the explained
		variance.

		'percent': The minimum variance that the retained factors are required
								to explain (default: 90% = 0.9)
		'N': The number of factors to retain. Overrides 'percent'.
				If the rank is less than N, N is ignored.
		"""
        if not 0 <= percent <= 1:
            raise ValueError(
                "Percent should be a real number between 0 and 1.")
        if N:
            if not isinstance(N, (int, int64)) or N <= 0:
                raise ValueError("N should be a positive integer.")
            N = min(N, self.rank)  # maybe we should notify the user?
            # S = zeros((self._numitems, N))
        # else:
        self.k = 1 + flatnonzero(cumsum(self.L) >= sum(self.L) * percent)[0]
        #  S = zeros((self._numitems, self.k))
        # the sign of the square root can be either way; singular value vs. eigenvalue
        # fill_diagonal(S, -sqrt(self.E) if self.cor else self.s)
        num2ret = N if N else self.k
        s = -sqrt(self.L) if self.cor else self.s
        S = diagsvd(s[:num2ret], len(self.Q), num2ret)
        self.G = _mul(self.D_c, self.Q.T,
                      S)  # important! note the transpose on Q
        return self.G
Ejemplo n.º 36
0
def load_caltech101_30(folder=CALTECH101_30_DIR, tiny_problem=False):
    caltech = scio.loadmat(folder + '/caltech101-30.matlab')
    k_train, k_test = caltech['Ktrain'], caltech['Ktest']
    label_tr, label_te = caltech['tr_label'], caltech['te_label']
    file_tr, file_te = caltech['tr_files'], caltech['te_files']

    if tiny_problem:
        pattern_step = 5
        fraction_limit = 0.2
        k_train = k_train[:int(len(label_tr) * fraction_limit):pattern_step,
                  :int(len(label_tr) * fraction_limit):pattern_step]
        label_tr = label_tr[:int(len(label_tr) * fraction_limit):pattern_step]

    U, s, Vh = linalg.svd(k_train)
    S_sqrt = linalg.diagsvd(s ** 0.5, len(s), len(s))
    X = np.dot(U, S_sqrt)  # examples in rows

    train_x, val_x, test_x = X[0:len(X):3, :], X[1:len(X):3, :], X[2:len(X):3, :]
    label_tr_enc = to_one_hot_enc(np.array(label_tr) - 1)
    train_y, val_y, test_y = label_tr_enc[0:len(X):3, :], label_tr_enc[1:len(X):3, :], label_tr_enc[2:len(X):3, :]
    train_file, val_file, test_file = file_tr[0:len(X):3], file_tr[1:len(X):3], file_tr[2:len(X):3]

    test_dataset = Dataset(data=test_x, target=test_y, info={'files': test_file})
    validation_dataset = Dataset(data=val_x, target=val_y, info={'files': val_file})
    training_dataset = Dataset(data=train_x, target=train_y, info={'files': train_file})

    return Datasets(train=training_dataset, validation=validation_dataset, test=test_dataset)
Ejemplo n.º 37
0
def check_svd_function(svd_function):
    """check whether svd_function behaves as np.linalg.svd."""
    for dtype in [np.float32, np.float64, np.complex64, np.complex128]:
        print("dtype = ", dtype)
        for m, n in [(1, 1), (1, 10), (10, 1), (10, 10), (10, 20)]:
            print("m, n = ", m, n)
            tol_NULP = 200 * max(
                max(m, n)**3,
                100)  # quite large tolerance, but seems to be required...
            if np.dtype(dtype).kind == 'c':  # complex?
                A = standard_normal_complex((m, n))
            else:
                A = np.random.standard_normal(size=(m, n))
            A = np.asarray(A, dtype)
            Sonly = svd_function(A, compute_uv=False)

            Ufull, Sfull, VTfull = svd_function(A,
                                                full_matrices=True,
                                                compute_uv=True)
            npt.assert_array_almost_equal_nulp(Sonly, Sfull, tol_NULP)
            recalc = Ufull.dot(diagsvd(Sfull, m, n)).dot(VTfull)
            npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP)

            U, S, VT = svd_function(A, full_matrices=False, compute_uv=True)
            npt.assert_array_almost_equal_nulp(Sonly, S, tol_NULP)
            recalc = U.dot(np.diag(S)).dot(VT)
            npt.assert_array_almost_equal_nulp(recalc, A, tol_NULP)
        print("types of U, S, VT = ", U.dtype, S.dtype, VT.dtype)
        assert U.dtype == A.dtype
Ejemplo n.º 38
0
def multivariateGaussian(X, mu, Sigma2):
    """
    Computes the probability density function of the examples X
    under the multivariate gaussian distribution with parameters
    mu and sigma2. If Sigma2 is a matrix, it is treated as the
    covariance matrix. If Sigma2 is a vector, it is treated as the
    sigma^2 values of the variances in each dimension (a diagonal
    covariance matrix).
    Args:
        X     : array(# of training examples m, # of features n)
        mu    : array(# of features n, 1)
        Sigma2: array(# of features n, # of features n)
    Returns:
        p     : array(# of training examples m,)
    """
    k = len(mu)

    if (Sigma2.shape[0] == 1) or (sigma2.shape[1] == 1):
        Sigma2 = linalg.diagsvd(Sigma2.flatten(), len(Sigma2.flatten()),
                                len(Sigma2.flatten()))
        X = X - mu.T
        p = np.dot(np.power(2 * np.pi, - k / 2.0),
                   np.power(np.linalg.det(Sigma2), -0.5)) * \
            np.exp(-0.5 * np.sum(np.dot(X, np.linalg.pinv(Sigma2)) * X, axis=1))

    return p
def fillmat(M):
    m, n = M.shape
    X = np.zeros(shape=(m, n))
    tau = 1.0
    mu_min = 1.0e-8
    eta_mu = 0.25
    mu = eta_mu * norm(np.nan_to_num(M))

    niter = 0
    max_iter = 10000
    xtol = 1.0e-3

    while (mu > mu_min) and (niter < max_iter):
        delta = 1.0
        while delta > xtol:
            X_prev = X
            Y = X - tau * np.nan_to_num(X - M)
            U, S, V = svd(Y, full_matrices=False)
            S1 = np.maximum(S - tau * mu, 0)
            S1 = diagsvd(S1, n, n)
            X = np.dot(U, np.dot(S1, V))
            delta = get_error(X, X_prev)

        mu = max(mu * eta_mu, mu_min)
        niter += 1
        print 'mu = {:0.4e}'.format(mu)

    return X
Ejemplo n.º 40
0
    def fs_r(self, percent=0.9, N=None):
        """Get the row factor scores (dimensionality-reduced representation),
        choosing how many factors to retain, directly or based on the explained
        variance.

        'percent': The minimum variance that the retained factors are required
                                to explain (default: 90% = 0.9)
        'N': The number of factors to retain. Overrides 'percent'.
                If the rank is less than N, N is ignored.
        """
        if not 0 <= percent <= 1:
                raise ValueError("Percent should be a real number between 0 and 1.")
        if N:
                if not isinstance(N, (int, np.int64)) or N <= 0:
                        raise ValueError("N should be a positive integer.")
                N = min(N, self.rank)
                # S = np.zeros((self._numitems, N))
        # else:
        self.k = 1 + np.flatnonzero(np.cumsum(self.L) >= sum(self.L)*percent)[0]
        #  S = np.zeros((self._numitems, self.k))
        # the sign of the square root can be either way; singular value vs. eigenvalue
        # np.fill_diagonal(S, -np.sqrt(self.E) if self.cor else self.s)
        num2ret = N if N else self.k
        s = -np.sqrt(self.L) if self.cor else self.s
        S = diagsvd(s[:num2ret], self._numitems, num2ret)

        from numpy import ndarray
        if not isinstance(self.D_r, ndarray):
            self.F = self.D_r.dot(self.P).dot(S[:self.P.shape[1]])
        else:
            self.F = _mul(self.D_r, self.P, S)
        return self.F
def fillmat(M):
    m, n = M.shape
    X = np.zeros(shape=(m, n))    
    tau = 1.0    
    mu_min = 1.0e-8
    eta_mu = 0.25
    mu = eta_mu * norm(np.nan_to_num(M)) 

    niter = 0
    max_iter = 10000
    xtol = 1.0e-3

    while (mu > mu_min) and (niter < max_iter):
        delta = 1.0
        while delta > xtol:         
            X_prev = X
            Y = X - tau * np.nan_to_num(X - M)
            U, S, V = svd(Y, full_matrices=False)
            S1 = np.maximum(S - tau * mu, 0)
            S1 = diagsvd(S1, n, n)
            X = np.dot(U, np.dot(S1, V))
            delta = get_error(X, X_prev)            

        mu = max(mu * eta_mu, mu_min)
        niter += 1       
        print 'mu = {:0.4e}'.format(mu)

    return X
Ejemplo n.º 42
0
def image_svd(n):
    img=mpimg.imread('mypicture.jpg')
    [r,g,b] = [img[:,:,i] for i in range(3)]
    r1,r2,r3 = sp.svd(r)
    g1,g2,g3 = sp.svd(g)
    b1,b2,b3 = sp.svd(b)
    r2_nonzero=(r2!=0).sum()
    g2_nonzero=(g2!=0).sum()
    b2_nonzero=(b2!=0).sum()
    print("The number of non zero elements in decompose sigma of red, green, blue matrices are", r2_nonzero,"," ,g2_nonzero,"and" ,b2_nonzero, "respectively.")
    
    r2[n:800]=np.zeros_like(r2[n:800])
    g2[n:800]=np.zeros_like(g2[n:800])
    b2[n:800]=np.zeros_like(b2[n:800])
    r2=sp.diagsvd(r2,800,1000)
    g2=sp.diagsvd(g2,800,1000)
    b2=sp.diagsvd(b2,800,1000)
    r_new=np.dot(r1, np.dot(r2,r3))
    g_new=np.dot(g1, np.dot(g2,g3))
    b_new=np.dot(b1, np.dot(b2,b3))
    img[:,:,0]=r_new
    img[:,:,1]=g_new
    img[:,:,2]=b_new
    
    fig = plt.figure(2)
    ax1 = fig.add_subplot(2,2,1)
    ax2 = fig.add_subplot(2,2,2)
    ax3 = fig.add_subplot(2,2,3)
    ax4 = fig.add_subplot(2,2,4)
    ax1.imshow(img)
    ax2.imshow(r, cmap = 'Reds')
    ax3.imshow(g, cmap = 'Greens')
    ax4.imshow(b, cmap = 'Blues')
    plt.show()
    
    img=mpimg.imread('mypicture.jpg')
    [r,g,b]=[img[:,:,i] for i in range(3)]
    fig=plt.figure(1)
    ax1=fig.add_subplot(2,2,1)
    ax2 = fig.add_subplot(2,2,2)
    ax3 = fig.add_subplot(2,2,3)
    ax4 = fig.add_subplot(2,2,4)
    ax1.imshow(img)
    ax2.imshow(r, cmap = 'Reds')
    ax3.imshow(g, cmap = 'Greens')
    ax4.imshow(b, cmap = 'Blues')
    plt.show()
Ejemplo n.º 43
0
Archivo: svd.py Proyecto: purepu/recsys
def approximation(m, k = 1):
    '''Do singular value decomposition according to k, which will lower rank to k * rank'''
    U, s, Vh = linalg.svd(m, False)
    tarlen = int(k * len(s))
    s = s[:tarlen]
    U = U.T[:tarlen].T
    V = Vh[:tarlen].T
    return dot(U, dot(linalg.diagsvd(s, len(s), len(s)), V.T))
Ejemplo n.º 44
0
def _calc_invJ(J, epsilon=0.01):
    u, sigma, v = np.linalg.svd(J, full_matrices=True)
    sigma_ = [1 / s if s > epsilon else 0 for s in sigma]
    rank_v = np.shape(J)[0]
    rank_h = np.shape(J)[1]

    return np.matrix(v.transpose()) * np.matrix(
        linalg.diagsvd(sigma_, rank_h, rank_v)) * np.matrix(u.transpose())
Ejemplo n.º 45
0
def pca(X):
    m = X.shape[0]
    cov = X.T @ X / m
    U, S, _ = np.linalg.svd(cov)
    # 将特征值转换为矩阵形式
    # https://docs.scipy.org/doc/scipy-0.19.1/reference/generated/scipy.linalg.diagsvd.html#scipy.linalg.diagsvd
    S = diagsvd(S, len(S), len(S))
    return np.matrix(U), np.matrix(S)
Ejemplo n.º 46
0
def ApplyWtmk(dwtDom2Wtmk, word):
    #Perform first SVD
    Ui, si, Vhi = linalg.svd(dwtDom2Wtmk, full_matrices=True)
    Si = linalg.diagsvd(si, min(Ui.shape[0], Vhi.shape[0]),
                        max(Ui.shape[1], Vhi.shape[1]))
    #    watermark = GenerateWatermark (dwtDom2Wtmk.shape, word)
    watermark = GenerateWatermark(Si.shape, word)
    #Apply watermark
    Si += watermark
    #Perform second SVD
    Uwi, swi, Vhwi = linalg.svd(Si, full_matrices=True)
    Swi = linalg.diagsvd(swi, min(Uwi.shape[0], Vhwi.shape[0]),
                         max(Uwi.shape[1], Vhwi.shape[1]))
    #Restore chosen dwt domain with watermark embeded
    wtmkdDom = np.dot(Ui, np.dot(Swi, Vhi))
    #wtmkdDom = dwtDom2Wtmk + watermark;
    return wtmkdDom
Ejemplo n.º 47
0
def resiGeneralised(A,B):
    L = lin.cholesky(B,overwrite_a=True)
    U,s,Vt = lin.svd(L) #koren Bja
    #S = lin.diagsvd(s,U[0].size,U[0].size)
    InvS = lin.diagsvd(1/s,U[0].size,U[0].size)
    InvKoren = np.matrix(Vt).H * InvS * np.matrix(U).H
    C = InvKoren * np.matrix(A) * InvKoren
    print(lin.eig(C)[0])
def svd(R):
	'''
	Returns singular value decomposition of the ratings matrix
	'''
	U, S, Vt = linalg.svd(R, full_matrices=False)
	k = len(S) 
	S = linalg.diagsvd(S, k, k)
	return U, S, Vt
def remove_constants(matrix, n=5):
    U, W, V_t = svd(matrix, full_matrices=True)
    W[:n] = 0.0
    M = W.shape[0]
    N = V_t.shape[1]
    S = diagsvd(W, M, N)

    return np.dot( np.dot(U, S), V_t)
Ejemplo n.º 50
0
def lsa(document_word_matrix, dimension):
    """
    Take a document-word matrix and retrieve document-concept and concept-word
    matrices from it using latent semantic analysis (LSA).
    """
    # We need to know the shape of our starting document-word matrix in
    # terms of number of rows and columns in order to run LSA.
    rows, cols = document_word_matrix.shape

    #for row in range(rows):
    #    document_word_matrix[row,:] /= math.sqrt()

    # We can't create a matrix bigger than what we started with
    if dimension > rows:
        raise ValueError("Dimension {} too big!".format(dimension))

    # Dimensions also have to be positive
    elif dimension < 1:
        raise ValueError("Dimension {} too small!".format(dimension))

    # We use singular value decomposition to decompose our original
    # document-word matrix into three matrixes that, multiplied together,
    # recreate our original:
    # - word_topic: a matrix with m terms as rows and r "concept"
    #   proportions as columns,
    # - singular_values: a nonnegative diagonal matrix of r rows and r 
    #   columns, and
    # - topic_document: a matrix with r "concepts" as rows and n documents
    #   as columns.
    # Because the singular_values matrix actually only has values on the 
    # diagonal, we just get it as a list of r singular values that would be
    # the diagonal of the matrix in order from greatest to least.
    word_topic, singular_values, topic_document = linalg.svd(document_word_matrix)
    print singular_values

    # Our goal is to reduce the original dimensions of this to the number
    # of concepts or "topics" we want, which we do by discarding all of the
    # columns and rows corresponding to values we don't need. This is
    # straightforward for our word-topic matrix: we throw out all of the
    # columns past the dimension we want.
    lsa_singular_values = singular_values[:dimension]
    lsa_word_topic = word_topic[:,:dimension]
    
    # Our topic-document matrix is a little trickier, because we'd rather
    # have our documents as rows and topics as columns, and right now it's
    # the other way around. So we'll switch it or transpose it.
    lsa_topic_document = topic_document[:dimension,:]
    lsa_document_topic = np.transpose(lsa_topic_document)

    # We can check that we did things right by using our new matrices
    new_singular_matrix = linalg.diagsvd(lsa_singular_values, dimension, dimension)
    transformed_matrix = dot(dot(lsa_word_topic, new_singular_matrix), lsa_topic_document)
    
    # We know that SVD gives us in our singular value matrix the values we care
    # about in order.
    print "Representation error: {}".format(np.sum((document_word_matrix - transformed_matrix)**2))

    return lsa_word_topic, lsa_document_topic
Ejemplo n.º 51
0
	def compute_svd(self, matrix) :
		
		U, sigma, VT = linalg.svd(matrix)

		sigma_prime = linalg.diagsvd(sigma, len(matrix), len(VT))

		tfidf_prime = numpy.dot(numpy.dot(U, sigma_prime), VT)

		return tfidf_prime
Ejemplo n.º 52
0
def exsh(quest):
	qtxt=quest
 	mean=preprocess_wiki()
 	vectorizer = CountVectorizer(min_df=0,stop_words=None,ngram_range=(1 , 1))
 	X = vectorizer.fit_transform(mean)
 	#analyze = vectorizer.build_analyzer()
 	matrix = X.toarray()
	values=preprocess_wiki_values()
	mlist=values
	qtxt_q=preprocess_q(quest)
	idevent=quest.split(',')
	noua= qtxt_q.split(',')
	fcontent=[]
	fcontent.append(idevent[0])
	fcontent = [wip for wip in noua if re.sub(r'[^0-9A-Za-z]', "", wip)]
	 
	q=vectorizer.transform([qtxt_q]).toarray()
	qtr= zip(*q)
	qt = np.matrix(qtr)
	M,N = matrix.shape	
	U,s,Vh = linalg.svd(matrix)
	Sig = linalg.diagsvd(s,M,N)
	U, Vh = U, Vh
	# print U # -> is Vh
	# print Vh # -> is U
	ur=U.dot(Sig.dot(Vh))

	Uk= np.matrix(Vh)
	#si quisiera guardar la matriz de keywords deberia ser aqui en este lugar
	#Sigk=np.matrix([[ 1/4.0989,0., 0., 0., 0., 0., 0., 0., 0., 0.,0. ], [ 0., 1/2.3616,  0., 0., 0., 0., 0., 0., 0., 0.,0. ],[ 0.,0.,1/1.27197841,0.,0.,0.,0.,0.,0.,0.,0. ]])
	Sig=np.matrix(Sig)
	Sigk=Sig.getI()
	Sigk= Sigk.T
	qr=Sigk.dot(Uk)
	r=qr.dot(qt)

	qqr=zip(*r.tolist())
	qlen=len(qqr[0])

	k = 5 #Rango corte
	rus=fcontent[:10]
 	#rus=[]
	if k < qlen:
		#print "'-._.-'"
		for tr in range(len(U)):
			res=1-scipy.spatial.distance.cosine(U[tr][:k], qqr[0][:k])
			if res >= 0.8:
 				rus.append(mlist[tr])
	else:
		print "Pruna menor de k ...determina un elemento mayor"

	tot_res=[qtxt]
	tot_res.append(rus)	
	return tot_res #resultados titulo + palabras correlacionadas

#qtxt='12363 , Carabas Presents: Alternative Drinks (Music, Sci-fi, Film, Comics etc),enjoy along come friends invite ticket book etc art events bands comics books projects work attendees recommendations nostalgia clips wonderful weird screen plasma reel Film Visuals day present others Dance Rock Indie crowdpleaser system sound mix playlist Alternative Soundtrack room function venue Excellent drinks folk meeting enjoys thing kind like sounds thinks anyone frankly woes share ideas bounce work promote others meet professionals artists writers musicians creators like Comics fi Sci Style Music Alternative Film Cult life side alternative loves anyone night social ,1,Career and Life Balance,98,Music , 0'
#print exsh(qtxt)
Ejemplo n.º 53
0
 def _make_data(n_samples, n_features, n_tasks, n_components):
     W = rng.rand(n_tasks, n_features) - 0.5
     U, S, V = svd(W, full_matrices=True)
     S[n_components:] = 0
     S = diagsvd(S, U.shape[0], V.shape[0])
     W = np.dot(np.dot(U, S), V)
     X = rng.rand(n_samples, n_features) - 0.5
     Y = np.dot(X, W.T)
     return X, Y, W
Ejemplo n.º 54
0
def train_complete_SVD(M, K=9):
    useravg, itemavg = find_user_and_item_avg(M)
    R_norm = norm_matrix(M, useravg, itemavg)
    U, s, V = linalg.svd( R_norm, full_matrices=False )
    m_user,n_movies = R_norm.shape
    new_s = s[:K]
    sigma_1_2 = linalg.diagsvd(np.sqrt(new_s), K, K)
    U_tilde = np.dot(U[:,:K], sigma_1_2)
    V_tilde = np.dot(sigma_1_2, V[:K,:])
    return U_tilde, V_tilde
Ejemplo n.º 55
0
def image_svd(n):
    img=mpimg.imread('rainbow.jpg')

    [j,k,m] = [img[:,:,i] for i in range(3)]
        
    j1, j2, j3 = linalg.svd(j)
    k1, k2, k3 = linalg.svd(k)
    m1, m2, m3 = linalg.svd(m)
    
    j2_nonzero=(j2!=0).sum()
    k2_nonzero=(k2!=0).sum()
    m2_nonzero=(m2!=0).sum()
    print("The number of non zero elements in decompose sigma of red, green, blue matrices are", j2_nonzero,"," ,k2_nonzero,"and" ,m2_nonzero, "respectively.")
    
    
    j2[n:800] = np.zeros_like(j2[n:800])
    k2[n:800] = np.zeros_like(k2[n:800])
    m2[n:800] = np.zeros_like(m2[n:800])
    
    j2 = linalg.diagsvd(j2,800,1000)
    k2 = linalg.diagsvd(k2,800,1000)
    m2 = linalg.diagsvd(m2,800,1000)
    
    j_new = np.dot(j1, np.dot(j2, j3))
    k_new = np.dot(k1, np.dot(k2, k3))
    m_new = np.dot(m1, np.dot(m2, m3))
      
    img[:,:,0]=j_new
    img[:,:,1]=k_new
    img[:,:,2]=m_new
    
    fig2 = plt.figure(2)
    ax1 = fig2.add_subplot(2,2,1)
    ax2 = fig2.add_subplot(2,2,2)
    ax3 = fig2.add_subplot(2,2,3)
    ax4 = fig2.add_subplot(2,2,4)
    ax1.imshow(img)
    ax2.imshow(j_new, cmap = 'Blues')
    ax3.imshow(k_new, cmap = 'Reds')
    ax4.imshow(m_new, cmap = 'Greens')
    plt.show() 
Ejemplo n.º 56
0
def mySVD(matrix,doCheck=0,kk=False):
    '''
    Singular Value decomposition                                   
    method to compute the inverse
    of matrix        
    option: doCheck=0,1,2
    0: return only the inverted SVD matrix
    1: return previous plus composite matrices
    2: return previous plus checks
           0,1                         ,2
    USAGE: s,sU,sUt,sV,sVh,sSig,sinvSig,sCheck = numMath.mySVD(a,doCheck=2)
    '''
    ### Compute the SVD parts
    M,N       = matrix.shape
    U,s,Vh    = linalg.svd(matrix)
    Sig       = linalg.diagsvd(s,M,N)

    V = np.matrix(Vh).H
    Ut = np.matrix(U).T
    
    # invSig = linalg.inv( np.matrix(Sig) )
    invSig = np.matrix(Sig).I
    
    ### Correct for ill-ness
    w = np.where(Sig<=10**(-14))
    invSig[w]=0.0
    
    ### Compute the Inverse of the matrix 
    invSVD = V.dot(invSig).dot(Ut)  

    ### and check the matrix
    checkMatrix = U.dot(Sig.dot(Vh))
    checkProduct = invSVD.dot(matrix)

    #print ' invSVD = \n', invSVD
    #print ' sCheck = \n', checkMatrix

    #print ' invSVD.dot(matrix)     = \n  ', checkProduct

    if(doCheck==0):
        return(invSVD)
    elif(doCheck==1):
        return(invSVD,U,Ut,V,Vh,Sig,invSig)
    elif(doCheck==2):
        if(kk==True):  
            print 'V.dot(Vh)= \n',V.dot(Vh)
            print 'U.dot(Ut)= \n',U.dot(Ut)  
            print 'Sig.dot(invSig)= \n',Sig.dot(invSig)
        return(invSVD,U,Ut,V,Vh,Sig,invSig,checkMatrix)

    else:
        print'Read the description of numMath.mySVD'
Ejemplo n.º 57
0
 def doLSA(self):
     self.U,s,self.Vt = linalg.svd(self.TM) 
     #print 'Eigen Values : ',s 
     #self.U, self.Vt = array(self.U),array(self.Vt)
     # Reduce Sig
     for i in range(self.k,len(s)):
        s[i] = 0 
     #for i in range(0,self.k):
     #    print s[i]
     r,c = self.U.shape
     l,z = self.Vt.shape
     self.sigma = array(linalg.diagsvd(s,r,z))
     self.CM = dot(dot(self.U ,self.sigma), self.Vt)
Ejemplo n.º 58
0
Archivo: LSA.py Proyecto: agune/kisa
    def transform(self, dimensions=1):
        rows,cols = self.matrix.shape
        if dimensions <= rows: 

            u,sigma,vt = linalg.svd(self.matrix)
            for index in range(rows - dimensions, rows):
                sigma[index] = 0

            transformed_matrix = dot(dot(u, linalg.diagsvd(sigma, len(self.matrix), len(vt))) ,vt)

            return  transformed_matrix
        else :
            print("dimension reduction cannot be greater than %s" % (rows))