Example #1
0
    def test_weighting(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(standardize=False))

        X1 = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.], [5., 10.]])
        w1 = np.array([0., 1., 2., 1., 1.])
        y1 = np.array([0, 0, 0, 1, 2])

        X2 = np.array([[2., 4.], [3., 6.], [3., 6.], [4., 8.], [5., 10.]])
        w2 = np.array([0., 1., 2., 1., 1.])
        y2 = np.array([0, 0, 0, 1, 2])

        pipeline.fit(X1, y1, w1)
        Xt1, _, _ = pipeline.transform(X1, y1, w1)
        means1 = Xt1.mean(axis=0)
        stds1 = Xt1.std(axis=0, ddof=1)

        pipeline.fit(X2, y2, w2)
        Xt2, _, _ = pipeline.transform(X2, y2, w2)
        means2 = Xt1.mean(axis=0)
        stds2 = Xt1.std(axis=0, ddof=1)

        for mean1, mean2 in zip(means1, means2):
            self.assertAlmostEqual(mean1, mean2)

        for std1, std2 in zip(stds1, stds2):
            self.assertAlmostEqual(std1, std2)
Example #2
0
    def test_indices_wildcard(self):
        pipeline = Pipeline()
        pipeline.add_step('overwrite', Overwrite(value=0), '*')

        self.fit(pipeline)
        Xt = self.transform(pipeline)

        self.X[:, :] = 0
        self.assertTrue(np.allclose(self.X, Xt))
Example #3
0
    def test_unit_step(self):
        pipeline = Pipeline()
        pipeline.add_step('unit', BaseStep('unit'), [2, 4])
        self.assertTrue(pipeline.has_step('unit'))

        self.fit(pipeline)
        Xt = self.transform(pipeline)

        self.assertTrue(np.allclose(self.X, Xt))
Example #4
0
    def test_Overwriter(self):
        pipeline = Pipeline()
        indices = [2, 4]
        pipeline.add_step('overwrite', Overwrite(value=0), indices)

        self.fit(pipeline)
        Xt = self.transform(pipeline)

        self.X[:, indices] = 0
        self.assertTrue(np.allclose(self.X, Xt))
Example #5
0
    def test_PCA(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(standardize=False))

        X = np.array([[1., 2.], [2., 4.], [3., 6.]])
        w = np.array([1., 1., 1.])
        y = np.zeros(3)

        abspcc = lambda a, b: abs(np.corrcoef(a, b)[0][1])
        self.assertGreater(abspcc(X[:, 0], X[:, 1]), .99)

        pipeline.fit(X, y, w)

        pca = pipeline.get_step('pca')
        R = pca.R
        self.assertTrue(np.allclose(R.dot(R.T), np.identity(R.shape[0])))

        Xt, yt, wt = pipeline.transform(X, y, w)
        self.assertTrue(np.allclose(y, yt))
        self.assertTrue(np.allclose(w, wt))

        cov = np.cov(Xt.T)
        self.assertAlmostEqual(cov[0, 0], 5.)
        self.assertAlmostEqual(cov[1, 0], 0.)
        self.assertAlmostEqual(cov[0, 1], 0.)
        self.assertAlmostEqual(cov[1, 1], 0.)
Example #6
0
    def test_ordering(self):
        pipeline = Pipeline()
        pipeline.add_step('wpca', BinaryWPCA())

        mean1 = [.5, -.5]
        mean2 = [-.5, .5]
        cov = [[1., .9], [.9, 1.]]
        X1 = np.random.multivariate_normal(mean1, cov, 10000)
        X2 = np.random.multivariate_normal(mean2, cov, 10000)

        X = np.vstack((X1, X2))
        y = np.append(np.zeros(10000), np.ones(10000))
        w = np.ones(20000)
        pipeline.fit(X, y, w)

        Xt, _, _ = pipeline.transform(X, y, w)
        is_diag = lambda X: np.allclose(X - np.diag(np.diagonal(X)),
                                        np.zeros(X.shape))
        self.assertTrue(is_diag(np.cov(Xt.T)))

        sel1 = (y == 0)
        sel2 = (y == 1)

        distance = lambda x1, w1, x2, w2: wasserstein_distance(
            u_values=x1, v_values=x2, u_weights=w1, v_weights=w2)

        dist1 = distance(x1=X[sel1, 0], w1=w[sel1], x2=Xt[sel2, 0], w2=w[sel2])
        dist2 = distance(x1=X[sel1, 1], w1=w[sel1], x2=Xt[sel2, 1], w2=w[sel2])

        self.assertLess(dist1, dist2)
Example #7
0
    def test_fit_decoupling(self):
        pipeline = Pipeline()
        indices = [2, 4]
        pipeline.add_step('center', Center(), indices)

        X = np.random.rand(5, 10)
        X[:, indices[0]] = np.arange(5).astype(float)
        X[:, indices[1]] = np.arange(5).astype(float) * 2
        mean = np.mean(X[:, indices], axis=0)
        self.assertAlmostEqual(mean[0], 2.)
        self.assertAlmostEqual(mean[1], 4.)

        y = np.arange(5)
        w = np.arange(5)

        pipeline.fit(X, y, w)

        Xt, _, _ = pipeline.transform(np.ones_like(X), y, w)

        means = Xt.mean(axis=0)
        self.assertAlmostEqual(means[0], 1.)
        self.assertAlmostEqual(means[1], 1.)
        self.assertAlmostEqual(means[2], -1.)
        self.assertAlmostEqual(means[3], 1.)
        self.assertAlmostEqual(means[4], -3.)
Example #8
0
    def test_skipping(self):
        pipeline = Pipeline()
        pipeline \
            .add_step('scaler1', Scale(factor=2)) \
            .add_step('scaler2', Scale(factor=3)) \
            .add_step('scaler3_4', Pipeline()
                      .add_step('scaler3', Scale(5))
                      .add_step('scaler4', Scale(7))) \
            .add_step('scaler5', Scale(11)) \
            .add_step('scaler6', Scale(13))

        self.fit(pipeline)

        Xt = self.transform(pipeline)
        self.assertTrue(np.allclose(self.X * 2 * 3 * 5 * 7 * 11 * 13, Xt))

        Xt = self.transform(pipeline, first_step='scaler2')
        self.assertTrue(np.allclose(self.X * 3 * 5 * 7 * 11 * 13, Xt))

        Xt = self.transform(pipeline, first_step='scaler3_4')
        self.assertTrue(np.allclose(self.X * 5 * 7 * 11 * 13, Xt))

        Xt = self.transform(pipeline, first_step=['scaler3_4', 'scaler4'])
        self.assertTrue(np.allclose(self.X * 7 * 11 * 13, Xt))

        Xt = self.transform(pipeline, last_step='scaler5')
        self.assertTrue(np.allclose(self.X * 2 * 3 * 5 * 7 * 11, Xt))

        Xt = self.transform(pipeline, last_step='scaler3_4')
        self.assertTrue(np.allclose(self.X * 2 * 3 * 5 * 7, Xt))

        Xt = self.transform(pipeline, last_step=['scaler3_4', 'scaler3'])
        self.assertTrue(np.allclose(self.X * 2 * 3 * 5, Xt))

        Xt = self.transform(pipeline,
                            first_step='scaler2',
                            last_step=['scaler3_4', 'scaler3'])
        self.assertTrue(np.allclose(self.X * 3 * 5, Xt))
Example #9
0
    def test_standardize(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(standardize=True))

        X = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.], [5., 10.]])
        w = np.array([1., 1., 1., 1., 1.])
        y = np.array([0, 0, 0, 1, 2])

        pipeline.fit(X, y, w)
        Xt, _, _ = pipeline.transform(X, y, w)
        for mean, std in zip(Xt.mean(axis=0), Xt.std(axis=0, ddof=1)):
            self.assertAlmostEqual(mean, 0.)
            self.assertAlmostEqual(std, 1.)
Example #10
0
    def test_PCA_ignore(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(ignore=[1, 2], standardize=False))

        X = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.], [5., 10.]])
        w = np.array([1., 1., 1., 1., 1.])
        y = np.array([0, 0, 0, 1, 2])

        pipeline.fit(X, y, w)

        Xt, _, _ = pipeline.transform(X, y, w)
        cov = np.cov(Xt[(y != 1) & (y != 2)].T)
        self.assertAlmostEqual(cov[0, 0], 5.)
        self.assertAlmostEqual(cov[1, 0], 0.)
        self.assertAlmostEqual(cov[0, 1], 0.)
        self.assertAlmostEqual(cov[1, 1], 0.)
Example #11
0
    def test_compose(self):
        pipeline = Pipeline()
        pipeline \
            .add_step('overwrite1', Overwrite(value=1), [2, ]) \
            .add_step('overwrite2', Overwrite(value=2), [4, ]) \
            .add_step('scale', Scale(3), [2, 4])

        self.fit(pipeline)

        Xt = self.transform(pipeline, last_step='overwrite2')
        self.assertTrue(np.all(Xt[:, 2] == 1))
        self.assertTrue(np.all(Xt[:, 4] == 2))
        self.assertTrue(np.allclose(self.X[:, [0, 1, 3]], Xt[:, [0, 1, 3]]))

        Xt = self.transform(pipeline)
        self.assertTrue(np.all(Xt[:, 2] == 3))
        self.assertTrue(np.all(Xt[:, 4] == 6))
        self.assertTrue(np.allclose(self.X[:, [0, 1, 3]], Xt[:, [0, 1, 3]]))
Example #12
0
    def test_set_params(self):
        pipeline = Pipeline()
        pipeline \
            .add_step('scaler1', Scale()) \
            .add_step('scaler2', Scale())

        pipeline.set_step_params('scaler1', {
            'factor': 2,
        })
        pipeline.set_step_params('scaler2', {
            'factor': 3,
        })

        self.fit(pipeline)
        Xt = self.transform(pipeline)

        self.assertTrue(np.allclose(self.X * 6, Xt))
Example #13
0
    def test_standardize(self):
        pipeline = Pipeline()
        pipeline.add_step('std', Standardizer())

        X = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.]])
        y = np.zeros(4)
        w = np.array([2., 3., 3., 4.])

        pipeline.fit(X, y, w)

        step = pipeline.get_step('std')
        self.assertAlmostEqual(step.mean[0], 2.75)
        self.assertAlmostEqual(step.mean[1], 5.5)
        self.assertAlmostEqual(step.std[0], np.sqrt(14.25) / 3.)
        self.assertAlmostEqual(step.std[1], np.sqrt(57.) / 3.)

        Xt, _, _ = pipeline.transform(X, y, w)

        X = np.array([
            (np.array([1., 2., 3., 4.]) - 2.75) / (np.sqrt(14.25) / 3.),
            (np.array([2., 4., 6., 8.]) - 5.5) / (np.sqrt(57.) / 3.)
        ]).T

        self.assertTrue(np.allclose(X, Xt))
Example #14
0
        self.mean = None

    def fit(self, X, y, w):
        self.mean = X.mean()

    def transform(self, X, y, w):
        X = np.ones_like(X) * self.mean
        return X, y, w


def generate_data(m=10, n=5):
    X = np.random.rand(m, n)
    y = np.random.randint(low=0, high=2, size=m)
    w = np.random.rand(m)
    return X, y, w


if __name__ == '__main__':
    pipeline = Pipeline()
    pipeline \
        .add_step('step1', TestStep(), indices=[1, 3]) \
        .add_step('step2', TestStep(), indices=[2, 4])

    X1, y1, w1 = generate_data()
    pipeline.fit(X1, y1, w1)

    X2, y2, w2 = generate_data()
    Xt, yt, wt = pipeline.transform(X2, y2, w2)

    print(Xt)