def test_outer(self): mat1 = RowMatrix(self.sc.parallelize([(1, array([1, 2, 3])), (2, array([4, 5, 6]))])) resultA = mat1.gramian() resultB1 = mat1.gramian("accum") resultB2 = mat1.gramian("aggregate") truth = array([[17, 22, 27], [22, 29, 36], [27, 36, 45]]) assert array_equal(resultA, truth) assert array_equal(resultB1, truth) assert array_equal(resultB2, truth) # TODO: TestCenter, TestZScore
def test_outer(self): mat1 = RowMatrix( self.sc.parallelize([(1, array([1, 2, 3])), (2, array([4, 5, 6]))])) resultA = mat1.gramian() resultB1 = mat1.gramian("accum") resultB2 = mat1.gramian("aggregate") truth = array([[17, 22, 27], [22, 29, 36], [27, 36, 45]]) assert array_equal(resultA, truth) assert array_equal(resultB1, truth) assert array_equal(resultB2, truth) # TODO: TestCenter, TestZScore
def calc(self, mat): """ Calcuate singular vectors Parameters ---------- mat : RDD of (tuple, array) pairs, or RowMatrix Matrix to compute singular vectors from Returns ---------- self : returns an instance of self. """ if type(mat) is not RowMatrix: mat = RowMatrix(mat) if self.method == "direct": # get the normalized gramian matrix cov = mat.gramian() / mat.nrows # do a local eigendecomposition eigw, eigv = eigh(cov) inds = argsort(eigw)[::-1] s = sqrt(eigw[inds[0:self.k]]) * sqrt(mat.nrows) v = eigv[:, inds[0:self.k]].T # project back into data, normalize by singular values u = mat.times(v.T / s) self.u = u.rdd self.s = s self.v = v if self.method == "em": # initialize random matrix c = random.rand(self.k, mat.ncols) iter = 0 error = 100 # iterative update subspace using expectation maximization # e-step: x = (c'c)^-1 c' y # m-step: c = y x' (xx')^-1 while (iter < self.maxiter) & (error > self.tol): c_old = c # pre compute (c'c)^-1 c' c_inv = dot(c.T, inv(dot(c, c.T))) # compute (xx')^-1 through a map reduce xx = mat.times(c_inv).gramian() xx_inv = inv(xx) # pre compute (c'c)^-1 c' (xx')^-1 premult2 = mat.rdd.context.broadcast(dot(c_inv, xx_inv)) # compute the new c through a map reduce c = mat.rows().map( lambda x: outer(x, dot(x, premult2.value))).sum() c = c.T error = sum(sum((c - c_old)**2)) iter += 1 # project data into subspace spanned by columns of c # use standard eigendecomposition to recover an orthonormal basis c = orth(c.T) cov = mat.times(c).gramian() / mat.nrows eigw, eigv = eigh(cov) inds = argsort(eigw)[::-1] s = sqrt(eigw[inds[0:self.k]]) * sqrt(mat.nrows) v = dot(eigv[:, inds[0:self.k]].T, c.T) u = mat.times(v.T / s) self.u = u.rdd self.s = s self.v = v return self
def calc(self, mat): """ Calcuate singular vectors Parameters ---------- mat : RDD of (tuple, array) pairs, or RowMatrix Matrix to compute singular vectors from Returns ---------- self : returns an instance of self. """ if type(mat) is not RowMatrix: mat = RowMatrix(mat) if self.method == "direct": # get the normalized gramian matrix cov = mat.gramian() / mat.nrows # do a local eigendecomposition eigw, eigv = eigh(cov) inds = argsort(eigw)[::-1] s = sqrt(eigw[inds[0:self.k]]) * sqrt(mat.nrows) v = eigv[:, inds[0:self.k]].T # project back into data, normalize by singular values u = mat.times(v.T / s) self.u = u.rdd self.s = s self.v = v if self.method == "em": # initialize random matrix c = random.rand(self.k, mat.ncols) iter = 0 error = 100 # iterative update subspace using expectation maximization # e-step: x = (c'c)^-1 c' y # m-step: c = y x' (xx')^-1 while (iter < self.maxiter) & (error > self.tol): c_old = c # pre compute (c'c)^-1 c' c_inv = dot(c.T, inv(dot(c, c.T))) # compute (xx')^-1 through a map reduce xx = mat.times(c_inv).gramian() xx_inv = inv(xx) # pre compute (c'c)^-1 c' (xx')^-1 premult2 = mat.rdd.context.broadcast(dot(c_inv, xx_inv)) # compute the new c through a map reduce c = mat.rows().map(lambda x: outer(x, dot(x, premult2.value))).sum() c = c.T error = sum(sum((c - c_old) ** 2)) iter += 1 # project data into subspace spanned by columns of c # use standard eigendecomposition to recover an orthonormal basis c = orth(c.T) cov = mat.times(c).gramian() / mat.nrows eigw, eigv = eigh(cov) inds = argsort(eigw)[::-1] s = sqrt(eigw[inds[0:self.k]]) * sqrt(mat.nrows) v = dot(eigv[:, inds[0:self.k]].T, c.T) u = mat.times(v.T / s) self.u = u.rdd self.s = s self.v = v return self