Esempio n. 1
0
    def fit(self, data):
        """Estimate principal components

        Parameters
        ----------
        data : Series or a subclass (e.g. RowMatrix)
            Data to estimate independent components from, must be a collection of
            key-value pairs where the keys are identifiers and the values are
            one-dimensional arrays
        """

        if not (isinstance(data, Series)):
            raise Exception('Input must be Series or a subclass (e.g. RowMatrix)')

        if type(data) is not RowMatrix:
            data = data.toRowMatrix()

        mat = data.center(0)

        svd = SVD(k=self.k, method=self.svdmethod)
        svd.calc(mat)

        self.scores = svd.u
        self.latent = svd.s
        self.comps = svd.v

        return self
Esempio n. 2
0
    def fit(self, data):
        """Estimate principal components

        Parameters
        ----------
        data : Series or a subclass (e.g. RowMatrix)
            Data to estimate independent components from, must be a collection of
            key-value pairs where the keys are identifiers and the values are
            one-dimensional arrays
        """

        if not (isinstance(data, Series)):
            raise Exception('Input must be Series or a subclass (e.g. RowMatrix)')

        if type(data) is not RowMatrix:
            data = data.toRowMatrix()

        mat = data.center(0)

        svd = SVD(k=self.k, method=self.svdMethod)
        svd.calc(mat)

        self.scores = svd.u
        self.latent = svd.s
        self.comps = svd.v

        return self
Esempio n. 3
0
    def test_SvdDirect(self):
        dataLocal = [
            array([1.0, 2.0, 6.0]),
            array([1.0, 3.0, 0.0]),
            array([1.0, 4.0, 6.0]),
            array([5.0, 1.0, 4.0])
        ]
        data = self.sc.parallelize(zip(range(1, 5), dataLocal))
        mat = RowMatrix(data)

        svd = SVD(k=1, method="direct")
        svd.calc(mat)
        uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal))
        uTest = transpose(array(svd.u.rows().collect()))[0]
        vTest = svd.v[0]
        assert (allclose(svd.s[0], sTrue[0]))
        assert (allclose(vTest, vTrue[0, :]) | allclose(-vTest, vTrue[0, :]))
        assert (allclose(uTest, uTrue[:, 0]) | allclose(-uTest, uTrue[:, 0]))
    def test_SvdDirect(self):
        dataLocal = [
            array([1.0, 2.0, 6.0]),
            array([1.0, 3.0, 0.0]),
            array([1.0, 4.0, 6.0]),
            array([5.0, 1.0, 4.0])
        ]
        data = self.sc.parallelize(zip(range(1, 5), dataLocal))
        mat = RowMatrix(data)

        svd = SVD(k=1, method="direct")
        svd.calc(mat)
        uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal))
        uTest = transpose(array(svd.u.rows().collect()))[0]
        vTest = svd.v[0]
        assert(allclose(svd.s[0], sTrue[0]))
        assert(allclose(vTest, vTrue[0, :]) | allclose(-vTest, vTrue[0, :]))
        assert(allclose(uTest, uTrue[:, 0]) | allclose(-uTest, uTrue[:, 0]))
    def test_SvdEM(self):
        dataLocal = [
            array([1.0, 2.0, 6.0]),
            array([1.0, 3.0, 0.0]),
            array([1.0, 4.0, 6.0]),
            array([5.0, 1.0, 4.0])
        ]
        data = self.sc.parallelize(zip(range(1, 5), dataLocal))
        mat = RowMatrix(data)

        svd = SVD(k=1, method="em")
        svd.calc(mat)
        uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal))
        uTest = transpose(array(svd.u.rows().collect()))[0]
        vTest = svd.v[0]
        tol = 10e-04  # allow small error for iterative method
        assert(allclose(svd.s[0], sTrue[0], atol=tol))
        assert(allclose(vTest, vTrue[0, :], atol=tol) | allclose(-vTest, vTrue[0, :], atol=tol))
        assert(allclose(uTest, uTrue[:, 0], atol=tol) | allclose(-uTest, uTrue[:, 0], atol=tol))
    def test_SvdEM(self):
        dataLocal = [
            array([1.0, 2.0, 6.0]),
            array([1.0, 3.0, 0.0]),
            array([1.0, 4.0, 6.0]),
            array([5.0, 1.0, 4.0])
        ]
        data = self.sc.parallelize(zip(range(1, 5), dataLocal))
        mat = RowMatrix(data)

        svd = SVD(k=1, method="em")
        svd.calc(mat)
        uTrue, sTrue, vTrue = LinAlg.svd(array(dataLocal))
        uTest = transpose(array(svd.u.rows().collect()))[0]
        vTest = svd.v[0]
        tol = 10e-04  # allow small error for iterative method
        assert(allclose(svd.s[0], sTrue[0], atol=tol))
        assert(allclose(vTest, vTrue[0, :], atol=tol) | allclose(-vTest, vTrue[0, :], atol=tol))
        assert(allclose(uTest, uTrue[:, 0], atol=tol) | allclose(-uTest, uTrue[:, 0], atol=tol))