Ejemplo n.º 1
0
    def do_pca(X, method='svd'):
        n_samples, n_features = X.shape
        pca = ml.features.PrincipalComponentAnalysis(fraction=1, method=method)
        pca.learn(X)

        evals, evecs = pca.eigenvalues, pca.eigenvectors
        assert (evals[1:] - evals[:-1] <= 0.).all()
        assert testing.almost_equal(np.array([np.dot(evecs[:, i], evecs[:, i])
                                              for i in range(n_features)]),
                                    1., rtol=1.e-6)
        assert testing.almost_equal(np.array([np.dot(evecs[:, i],
                                                     evecs[:, i + 1])
                                              for i in range(n_features - 1)]),
                                    0., atol=1.e-6)

        Xpc = pca.transform(X)
        assert testing.almost_equal(X, pca.invert(Xpc), rtol=1.e-6, atol=1.e-6)

        pca.n_components = 2
        Xpc = pca.transform(X)
        assert tuple(Xpc.shape) == (len(X), 2)

        pca.fraction_explained = pca.explained_variance[0]
        Xpc = pca.transform(X)
        assert tuple(Xpc.shape) == (len(X), 1)
Ejemplo n.º 2
0
def test_lp_norm():

    l1norm = lp_norm(p=1, axis=0, root=True)
    l2norm = lp_norm(p=2, axis=0, root=True)
    l3norm = lp_norm(p=3, axis=0, root=True)
    maxnorm = lp_norm(p='inf', axis=0, root=True)

    x = np.array((0., 0., 0))
    assert almost_equal(l1norm(x), 0.)
    assert almost_equal(l2norm(x), 0.)
    assert almost_equal(l3norm(x), 0.)
    assert almost_equal(maxnorm(x), 0.)

    x = np.array((-120.5, 32.1, -0.8))
    assert almost_equal(l1norm(x / l1norm(x)), 1.)
    assert almost_equal(l2norm(x / l2norm(x)), 1.)
    assert almost_equal(l3norm(x / l3norm(x)), 1.)
    assert almost_equal(maxnorm(x / maxnorm(x)), 1.)

    y = np.array((12.3, 5.1, 592.6))
    assert l1norm(x) + l1norm(y) >= l1norm(x + y) > 0.
    assert l2norm(x) + l2norm(y) >= l2norm(x + y) > 0.
    assert l3norm(x) + l3norm(y) >= l3norm(x + y) > 0.
    assert maxnorm(x) + maxnorm(y) >= maxnorm(x + y) > 0.

    norm = lp_norm(p=2, axis=1)
    x = np.eye(3)
    x[0, 1] = - 2
    assert almost_equal(norm(x), (np.sqrt(5), 1., 1.))
Ejemplo n.º 3
0
def test_standard():

    std = ml.features.Standard()
    X = make_data()
    std.learn(X)
    X_std = std.transform(X)
    assert testing.almost_equal(np.mean(X_std), 0., atol=1.e-6)
    assert testing.almost_equal(np.std(X_std), 1., rtol=1.e-6)
    X_std_inv = std.invert(X_std)
    assert testing.almost_equal(np.mean(X_std_inv), np.mean(X), atol=1.e-6)
    assert testing.almost_equal(np.std(X_std_inv), np.std(X), rtol=1.e-6)
Ejemplo n.º 4
0
def kmeans_clustering(X, k=None, centrinit=None, centroids=None,
                      init='++', atol=0.2, rtol=0.2, title='test'):

    kmeans = ml.KMeansCluster(init=init)
    kmeans.learn(X, k=k, centroids=centrinit)

    err_msg = ("\n{0}\ncentroids = {1}\n{2}\ntraining stats: {3}\ninfo: {4}"
               .format(title, centroids, kmeans, kmeans.training_stats,
                       kmeans._training_info))

    assert kmeans.converged, ("Failed to converge.\n" + err_msg +
                              plot_clusters(X, kmeans, title=title))

    if centroids is not None:
        k = len(centroids)
        for i in range(k):
            inearest, dnearest = - 1, - 1.
            for j in range(k):
                d2 = np.sum((kmeans.centroids[j] - centroids[i]) ** 2)
                if dnearest < 0 or d2 < dnearest:
                    inearest, dnearest = j, d2
        assert almost_equal(centroids[i], kmeans.centroids[inearest],
                            atol=atol, rtol=rtol), \
            ("Failed to find the centroids.\n" + err_msg +
             plot_clusters(X, kmeans, title=title))
Ejemplo n.º 5
0
def test_gradient_descent():

    # dummy training set
    X = np.random.normal(size=(4, 2))
    y = np.random.normal(size=4)

    # gradient descent object
    alpha = 0.15
    gd = opt.GradientDescent(alpha=alpha, atol=1.0e-9, rtol=1.0e-4, nstepmax=100, nminibatch=2)

    for method in ("batch", "minibatch", "stochastic"):
        for adapt in (False, True):

            gd.settings(method=method, adapt=adapt)
            theta = np.random.normal(loc=(0, 1, -1), scale=1, size=3)
            obj = ParaboloidObjective(theta)
            theta_guess = np.random.normal(scale=0.1, size=3)
            theta_hat, info = gd.minimize(obj, theta_guess, X, y)
            obj_min = obj.J(theta_hat, X, y)
            assert info["converged"], (
                "Gradient descent has failed to converge."
                + "\ninfo: {0}".format(info)
                + "\n{0}".format(str(gd))
                + "\n{0}".format(obj)
            )
            assert testing.almost_equal(obj_min, 0.0, rtol=1.0e-3, atol=1.0e-4), (
                "Minimum is not 0: {0}".format(obj_min)
                + "\ninfo: {0}".format(info)
                + "\n{0}".format(str(gd))
                + "\n{0}".format(obj)
            )
            assert testing.almost_equal(np.dot(theta, theta_hat), np.dot(theta, theta), rtol=1.0e-3, atol=1.0e-4), (
                "Gradient descent returned {0} instead of {1}.".format(theta, theta_hat)
                + "\ninfo: {0}".format(info)
                + "\n{0}".format(str(gd))
                + "\n{0}".format(obj)
            )

    try:
        gd.settings(alpha=-1)
    except ValueError:
        assert True
    else:
        assert False, "'settings(alpha=-1)' should have raised a ValueError."
    assert gd.settings()["alpha"] == alpha, "'alpha' ({0}) should be {1}.".format(gd.settings()["alpha"], alpha)
    assert str(gd).find("alpha") > 0, "bad string conversion:\n{0}".format(str(gd))
Ejemplo n.º 6
0
def test_confusion():

    y     = (0, 1, 1, 1, 0)
    y_hat = (0, 1, 1, 0, 0)
    conf = confusion(y, y_hat, c=1)
    assert (conf['falsepos'] == 0. and conf['truepos'] == 0.4  and
            conf['falseneg'] == 0.2 and conf['trueneg'] == 0.4)
    assert almost_equal(conf['precision'], 1.)
    assert almost_equal(conf['recall'], 0.4 / 0.6)
    assert almost_equal(conf['specificity'], 1.)
    assert almost_equal(conf['accuracy'], 0.8)

    try:
        conf = confusion(y, y_hat, c=2)
    except ValueError:
        assert True
    else:
        assert False, "An error should be raised when 'c' is not in 'y'"
Ejemplo n.º 7
0
def test_kfold_cv():

    X = np.linspace(-1, 1, 50)
    y = 1 - 2 * X + 1.e-6 * np.random.normal(size=len(X))
    cv = kfold_cross_validation(ml.LinearRegress(), X, y, k=10)
    assert almost_equal(cv, 0., atol=1.e-5), "cv={0}".format(cv)

    try:
        cv = kfold_cross_validation(ml.KMeansCluster(), X, y, k=10)
    except TypeError:
        assert True
    else:
        assert False, ("'kfold_cross_validation' should raise an error when " +
                       "called with an unsupervised predictor.")