def fit(self, data): """Estimate principal components Parameters ---------- data : Series or a subclass (e.g. RowMatrix) Data to estimate independent components from, must be a collection of key-value pairs where the keys are identifiers and the values are one-dimensional arrays """ if not (isinstance(data, Series)): raise Exception('Input must be Series or a subclass (e.g. RowMatrix)') if type(data) is not RowMatrix: data = data.toRowMatrix() mat = data.center(0) svd = SVD(k=self.k, method=self.svdmethod) svd.calc(mat) self.scores = svd.u self.latent = svd.s self.comps = svd.v return self
def fit(self, data): """Estimate principal components Parameters ---------- data : Series or a subclass (e.g. RowMatrix) Data to estimate independent components from, must be a collection of key-value pairs where the keys are identifiers and the values are one-dimensional arrays """ if not (isinstance(data, Series)): raise Exception('Input must be Series or a subclass (e.g. RowMatrix)') if type(data) is not RowMatrix: data = data.toRowMatrix() mat = data.center(0) svd = SVD(k=self.k, method=self.svdMethod) svd.calc(mat) self.scores = svd.u self.latent = svd.s self.comps = svd.v return self
def test_SvdDirect(self): dataLocal = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0]), array([5.0, 1.0, 4.0]) ] data = self.sc.parallelize(zip(range(1, 5), dataLocal)) mat = RowMatrix(data) svd = SVD(k=1, method="direct") svd.calc(mat) uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal)) uTest = transpose(array(svd.u.rows().collect()))[0] vTest = svd.v[0] assert (allclose(svd.s[0], sTrue[0])) assert (allclose(vTest, vTrue[0, :]) | allclose(-vTest, vTrue[0, :])) assert (allclose(uTest, uTrue[:, 0]) | allclose(-uTest, uTrue[:, 0]))
def test_SvdDirect(self): dataLocal = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0]), array([5.0, 1.0, 4.0]) ] data = self.sc.parallelize(zip(range(1, 5), dataLocal)) mat = RowMatrix(data) svd = SVD(k=1, method="direct") svd.calc(mat) uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal)) uTest = transpose(array(svd.u.rows().collect()))[0] vTest = svd.v[0] assert(allclose(svd.s[0], sTrue[0])) assert(allclose(vTest, vTrue[0, :]) | allclose(-vTest, vTrue[0, :])) assert(allclose(uTest, uTrue[:, 0]) | allclose(-uTest, uTrue[:, 0]))
def test_SvdEM(self): dataLocal = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0]), array([5.0, 1.0, 4.0]) ] data = self.sc.parallelize(zip(range(1, 5), dataLocal)) mat = RowMatrix(data) svd = SVD(k=1, method="em") svd.calc(mat) uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal)) uTest = transpose(array(svd.u.rows().collect()))[0] vTest = svd.v[0] tol = 10e-04 # allow small error for iterative method assert(allclose(svd.s[0], sTrue[0], atol=tol)) assert(allclose(vTest, vTrue[0, :], atol=tol) | allclose(-vTest, vTrue[0, :], atol=tol)) assert(allclose(uTest, uTrue[:, 0], atol=tol) | allclose(-uTest, uTrue[:, 0], atol=tol))
def test_SvdEM(self): dataLocal = [ array([1.0, 2.0, 6.0]), array([1.0, 3.0, 0.0]), array([1.0, 4.0, 6.0]), array([5.0, 1.0, 4.0]) ] data = self.sc.parallelize(zip(range(1, 5), dataLocal)) mat = RowMatrix(data) svd = SVD(k=1, method="em") svd.calc(mat) uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal)) uTest = transpose(array(svd.u.rows().collect()))[0] vTest = svd.v[0] tol = 10e-04 # allow small error for iterative method assert(allclose(svd.s[0], sTrue[0], atol=tol)) assert(allclose(vTest, vTrue[0, :], atol=tol) | allclose(-vTest, vTrue[0, :], atol=tol)) assert(allclose(uTest, uTrue[:, 0], atol=tol) | allclose(-uTest, uTrue[:, 0], atol=tol))