def test_nodeargs(): skip_if_no_external('mdp', min_version='2.4') ds = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4) for svd_val in [True, False]: pcm = PCAMapper(alg='PCA', svd=svd_val) assert_equal(pcm.node.svd, svd_val) pcm.train(ds) assert_equal(pcm.node.svd, svd_val) for output_dim in [0.5, 0.95, 0.99, 10, 50, 100]: pcm = PCAMapper(alg='PCA', output_dim=output_dim) for i in range(2): # so we also test on trained one if isinstance(output_dim, float): assert_equal(pcm.node.desired_variance, output_dim) else: assert_equal(pcm.node.output_dim, output_dim) pcm.train(ds) if isinstance(output_dim, float): assert_not_equal(pcm.node.output_dim, output_dim) # some dimensions are chosen assert_true(pcm.node.output_dim > 0)
def test_pcamapper(): # data: 40 sample feature line in 20d space (40x20; samples x features) ndlin = Dataset(np.concatenate([np.arange(40) for i in range(20)]).reshape(20,-1).T) pm = PCAMapper() # train PCA assert_raises(mdp.NodeException, pm.train, ndlin) ndlin.samples = ndlin.samples.astype('float') ndlin_noise = ndlin.copy() ndlin_noise.samples += np.random.random(size=ndlin.samples.shape) # we have no variance for more than one PCA component, hence just one # actual non-zero eigenvalue assert_raises(mdp.NodeException, pm.train, ndlin) pm.train(ndlin_noise) assert_equal(pm.proj.shape, (20, 20)) # now project data into PCA space p = pm.forward(ndlin.samples) assert_equal(p.shape, (40, 20)) # check that the mapped data can be fully recovered by 'reverse()' assert_array_almost_equal(pm.reverse(p), ndlin)