def __init__(self):
     super(Container, self).__init__()
     self.basis = Basis(self.X.shape[1])
     self.parent = ConstantBasisFunction()
     self.bf1 = HingeBasisFunction(self.parent, 1.0, 10, 1, False)
     self.bf2 = HingeBasisFunction(self.parent, 1.0, 4, 2, True)
     self.bf3 = HingeBasisFunction(self.bf2, 1.0, 4, 3, True)
     self.bf4 = LinearBasisFunction(self.parent, 2)
     self.bf5 = HingeBasisFunction(self.parent, 1.5, 8, 2, True)
     self.basis.append(self.parent)
     self.basis.append(self.bf1)
     self.basis.append(self.bf2)
     self.basis.append(self.bf3)
     self.basis.append(self.bf4)
     self.basis.append(self.bf5)
Esempio n. 2
0
class TestForwardPasser(object):

    def __init__(self):
        numpy.random.seed(0)
        self.basis = Basis(10)
        constant = ConstantBasisFunction()
        self.basis.append(constant)
        bf1 = HingeBasisFunction(constant, 0.1, 10, 1, False, 'x1')
        bf2 = HingeBasisFunction(constant, 0.1, 10, 1, True, 'x1')
        bf3 = LinearBasisFunction(bf1, 2, 'x2')
        self.basis.append(bf1)
        self.basis.append(bf2)
        self.basis.append(bf3)
        self.X = numpy.random.normal(size=(100, 10))
        self.B = numpy.empty(shape=(100, 4), dtype=numpy.float64)
        self.basis.transform(self.X, self.B)
        self.beta = numpy.random.normal(size=4)
        self.y = numpy.empty(shape=100, dtype=numpy.float64)
        self.y[:] = numpy.dot(
            self.B, self.beta) + numpy.random.normal(size=100)
        self.forwardPasser = ForwardPasser(
            self.X, self.y, numpy.ones(self.y.shape), max_terms=1000, penalty=1)

    def test_orthonormal_update(self):
        numpy.set_printoptions(precision=4)
        m, n = self.X.shape
        B_orth = self.forwardPasser.get_B_orth()
        v = numpy.random.normal(size=m)
        for i in range(1, 10):
            v_ = numpy.random.normal(size=m)
            B_orth[:, i] = 10 * v_ + v
            v = v_
            self.forwardPasser.orthonormal_update(i)

            B_orth_dot_B_orth_T = numpy.dot(B_orth[:, 0:i + 1].transpose(),
                                            B_orth[:, 0:i + 1])
            assert_true(
                numpy.max(numpy.abs(
                    B_orth_dot_B_orth_T - numpy.eye(i + 1))
                ) < .0000001
            )

    def test_run(self):
        self.forwardPasser.run()
        res = str(self.forwardPasser.get_basis()) + \
            '\n' + str(self.forwardPasser.trace())
        filename = os.path.join(os.path.dirname(__file__),
                                'forward_regress.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)
Esempio n. 3
0
 def __init__(self):
     super(Container, self).__init__()
     self.basis = Basis(self.X.shape[1])
     self.parent = ConstantBasisFunction()
     self.bf1 = HingeBasisFunction(self.parent, 1.0, 10, 1, False)
     self.bf2 = HingeBasisFunction(self.parent, 1.0, 4, 2, True)
     self.bf3 = HingeBasisFunction(self.bf2, 1.0, 4, 3, True)
     self.bf4 = LinearBasisFunction(self.parent, 2)
     self.bf5 = HingeBasisFunction(self.parent, 1.5, 8, 2, True)
     self.basis.append(self.parent)
     self.basis.append(self.bf1)
     self.basis.append(self.bf2)
     self.basis.append(self.bf3)
     self.basis.append(self.bf4)
     self.basis.append(self.bf5)
Esempio n. 4
0
 def __init__(self):
     numpy.random.seed(0)
     self.basis = Basis(10)
     constant = ConstantBasisFunction()
     self.basis.append(constant)
     bf1 = HingeBasisFunction(constant, 0.1, 10, 1, False, 'x1')
     bf2 = HingeBasisFunction(constant, 0.1, 10, 1, True, 'x1')
     bf3 = LinearBasisFunction(bf1, 2, 'x2')
     self.basis.append(bf1)
     self.basis.append(bf2)
     self.basis.append(bf3)
     self.X = numpy.random.normal(size=(100, 10))
     self.B = numpy.empty(shape=(100, 4), dtype=numpy.float64)
     self.basis.transform(self.X, self.B)
     self.beta = numpy.random.normal(size=4)
     self.y = numpy.empty(shape=100, dtype=numpy.float64)
     self.y[:] = numpy.dot(
         self.B, self.beta) + numpy.random.normal(size=100)
     self.earth = Earth(penalty=1)
Esempio n. 5
0
                            if_platform_not_win_32)
from nose.tools import (assert_equal, assert_true, assert_almost_equal,
                        assert_list_equal, assert_raises, assert_not_equal)
import numpy
from scipy.sparse import csr_matrix
from pyearth._types import BOOL
from pyearth._basis import (Basis, ConstantBasisFunction, HingeBasisFunction,
                            LinearBasisFunction)
from pyearth import Earth
import pyearth
from numpy.testing.utils import assert_array_almost_equal

regenerate_target_files = False

numpy.random.seed(1)
basis = Basis(10)
constant = ConstantBasisFunction()
basis.append(constant)
bf1 = HingeBasisFunction(constant, 0.1, 10, 1, False, 'x1')
bf2 = HingeBasisFunction(constant, 0.1, 10, 1, True, 'x1')
bf3 = LinearBasisFunction(bf1, 2, 'x2')
basis.append(bf1)
basis.append(bf2)
basis.append(bf3)
X = numpy.random.normal(size=(1000, 10))
missing = numpy.zeros_like(X, dtype=BOOL)
B = numpy.empty(shape=(1000, 4), dtype=numpy.float64)
basis.transform(X, missing, B)
beta = numpy.random.normal(size=4)
y = numpy.empty(shape=1000, dtype=numpy.float64)
y[:] = numpy.dot(B, beta) + numpy.random.normal(size=1000)
Esempio n. 6
0
import os
from .testing_utils import if_statsmodels, if_pandas, if_patsy, if_environ_has, \
    assert_list_almost_equal_value, assert_list_almost_equal, \
    if_sklearn_version_greater_than_or_equal_to
from nose.tools import assert_equal, assert_true, \
    assert_almost_equal, assert_list_equal, assert_raises, assert_not_equal
import numpy
from scipy.sparse import csr_matrix
from pyearth._types import BOOL
from pyearth._basis import (Basis, ConstantBasisFunction,
                            HingeBasisFunction, LinearBasisFunction)
from pyearth import Earth

numpy.random.seed(0)

basis = Basis(10)
constant = ConstantBasisFunction()
basis.append(constant)
bf1 = HingeBasisFunction(constant, 0.1, 10, 1, False, 'x1')
bf2 = HingeBasisFunction(constant, 0.1, 10, 1, True, 'x1')
bf3 = LinearBasisFunction(bf1, 2, 'x2')
basis.append(bf1)
basis.append(bf2)
basis.append(bf3)
X = numpy.random.normal(size=(100, 10))
missing = numpy.zeros_like(X, dtype=BOOL)
B = numpy.empty(shape=(100, 4), dtype=numpy.float64)
basis.transform(X, missing, B)
beta = numpy.random.normal(size=4)
y = numpy.empty(shape=100, dtype=numpy.float64)
y[:] = numpy.dot(B, beta) + numpy.random.normal(size=100)
Esempio n. 7
0
class TestEarth(object):

    def __init__(self):
        numpy.random.seed(0)
        self.basis = Basis(10)
        constant = ConstantBasisFunction()
        self.basis.append(constant)
        bf1 = HingeBasisFunction(constant, 0.1, 10, 1, False, 'x1')
        bf2 = HingeBasisFunction(constant, 0.1, 10, 1, True, 'x1')
        bf3 = LinearBasisFunction(bf1, 2, 'x2')
        self.basis.append(bf1)
        self.basis.append(bf2)
        self.basis.append(bf3)
        self.X = numpy.random.normal(size=(100, 10))
        self.B = numpy.empty(shape=(100, 4), dtype=numpy.float64)
        self.basis.transform(self.X, self.B)
        self.beta = numpy.random.normal(size=4)
        self.y = numpy.empty(shape=100, dtype=numpy.float64)
        self.y[:] = numpy.dot(
            self.B, self.beta) + numpy.random.normal(size=100)
        self.earth = Earth(penalty=1)

    def test_get_params(self):
        assert_equal(
            Earth().get_params(), {'penalty': None, 'min_search_points': None,
                                   'endspan_alpha': None, 'check_every': None,
                                   'max_terms': None, 'max_degree': None,
                                   'minspan_alpha': None, 'thresh': None,
                                   'minspan': None, 'endspan': None,
                                   'allow_linear': None, 'smooth': None})
        assert_equal(
            Earth(
                max_degree=3).get_params(), {'penalty': None,
                                             'min_search_points': None,
                                             'endspan_alpha': None,
                                             'check_every': None,
                                             'max_terms': None, 'max_degree': 3,
                                             'minspan_alpha': None,
                                             'thresh': None, 'minspan': None,
                                             'endspan': None,
                                             'allow_linear': None,
                                             'smooth': None})

    @if_statsmodels
    def test_linear_fit(self):
        from statsmodels.regression.linear_model import GLS, OLS
        self.earth.fit(self.X, self.y)
        self.earth._Earth__linear_fit(self.X, self.y)
        soln = OLS(self.y, self.earth.transform(self.X)).fit().params
        assert_almost_equal(numpy.mean((self.earth.coef_ - soln) ** 2), 0.0)

        sample_weight = 1.0 / (numpy.random.normal(size=self.y.shape) ** 2)
        self.earth.fit(self.X, self.y)
        self.earth._Earth__linear_fit(self.X, self.y, sample_weight)
        soln = GLS(self.y, self.earth.transform(
            self.X), 1.0 / sample_weight).fit().params
        assert_almost_equal(numpy.mean((self.earth.coef_ - soln) ** 2), 0.0)

    def test_sample_weight(self):
        group = numpy.random.binomial(1, .5, size=1000) == 1
        sample_weight = 1 / (group * 100 + 1.0)
        x = numpy.random.uniform(-10, 10, size=1000)
        y = numpy.abs(x)
        y[group] = numpy.abs(x[group] - 5)
        y += numpy.random.normal(0, 1, size=1000)
        model = Earth().fit(x, y, sample_weight=sample_weight)

        # Check that the model fits better for the more heavily weighted group
        assert_true(model.score(x[group], y[group]) < model.score(
            x[numpy.logical_not(group)], y[numpy.logical_not(group)]))

        # Make sure that the score function gives the same answer as the trace
        pruning_trace = model.pruning_trace()
        rsq_trace = pruning_trace.rsq(model.pruning_trace().get_selected())
        assert_almost_equal(model.score(x, y, sample_weight=sample_weight),
                            rsq_trace)

        # Uncomment below to see what this test situation looks like
#        from matplotlib import pyplot
#        print model.summary()
#        print model.score(x,y,sample_weight = sample_weight)
#        pyplot.figure()
#        pyplot.plot(x,y,'b.')
#        pyplot.plot(x,model.predict(x),'r.')
#        pyplot.show()

    def test_fit(self):
        self.earth.fit(self.X, self.y)
        res = str(self.earth.trace()) + '\n' + self.earth.summary()
#            fl.write(res)
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_regress.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)

    def test_smooth(self):
        model = Earth(penalty=1, smooth=True)
        model.fit(self.X, self.y)
        res = str(model.trace()) + '\n' + model.summary()
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_regress_smooth.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)

    def test_linvars(self):
        self.earth.fit(self.X, self.y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        res = str(self.earth.trace()) + '\n' + self.earth.summary()
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_linvars_regress.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)

    def test_score(self):
        model = self.earth.fit(self.X, self.y)
        record = model.pruning_trace()
        rsq = record.rsq(record.get_selected())
        assert_almost_equal(rsq, model.score(self.X, self.y))

    @if_pandas
    @if_environ_has('test_pathological_cases')
    def test_pathological_cases(self):
        import pandas
        directory = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'pathological_data')
        cases = {'issue_44': {},
                 'issue_50': {'penalty': 0.5,
                              'minspan': 1,
                              'allow_linear': False,
                              'endspan': 1,
                              'check_every': 1,
                              'sample_weight': 'issue_50_weight.csv'}}
        for case, settings in cases.iteritems():
            data = pandas.read_csv(os.path.join(directory, case + '.csv'))
            y = data['y']
            del data['y']
            X = data
            if 'sample_weight' in settings:
                filename = os.path.join(directory, settings['sample_weight'])
                sample_weight = pandas.read_csv(filename)['sample_weight']
                del settings['sample_weight']
            else:
                sample_weight = None
            model = Earth(**settings)
            model.fit(X, y, sample_weight=sample_weight)
            with open(os.path.join(directory, case + '.txt'), 'r') as infile:
                correct = infile.read()
            assert_equal(model.summary(), correct)

    @if_pandas
    def test_pandas_compatibility(self):
        import pandas
        X = pandas.DataFrame(self.X)
        y = pandas.DataFrame(self.y)
        colnames = ['xx' + str(i) for i in range(X.shape[1])]
        X.columns = colnames
        model = self.earth.fit(X, y)
        assert_list_equal(
            colnames, model.forward_trace()._getstate()['xlabels'])

    @if_patsy
    @if_pandas
    def test_patsy_compatibility(self):
        import pandas
        import patsy
        X = pandas.DataFrame(self.X)
        y = pandas.DataFrame(self.y)
        colnames = ['xx' + str(i) for i in range(X.shape[1])]
        X.columns = colnames
        X['y'] = y
        y, X = patsy.dmatrices(
            'y ~ xx0 + xx1 + xx2 + xx3 + xx4 + xx5 + xx6 + xx7 + xx8 + xx9 - 1',
            data=X)
        model = self.earth.fit(X, y)
        assert_list_equal(
            colnames, model.forward_trace()._getstate()['xlabels'])

    def test_pickle_compatibility(self):
        model = self.earth.fit(self.X, self.y)
        model_copy = pickle.loads(pickle.dumps(model))
        assert_true(model_copy == model)
        assert_true(
            numpy.all(model.predict(self.X) == model_copy.predict(self.X)))
        assert_true(model.basis_[0] is model.basis_[1]._get_root())
        assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())

    def test_copy_compatibility(self):
        model = self.earth.fit(self.X, self.y)
        model_copy = copy.copy(model)
        assert_true(model_copy == model)
        assert_true(
            numpy.all(model.predict(self.X) == model_copy.predict(self.X)))
        assert_true(model.basis_[0] is model.basis_[1]._get_root())
        assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
Esempio n. 8
0
class TestBasis(BaseTestClass):

    def __init__(self):
        super(self.__class__, self).__init__()
        self.basis = Basis(self.X.shape[1])
        self.parent = ConstantBasisFunction()
        self.bf1 = HingeBasisFunction(self.parent, 1.0, 10, 1, False)
        self.bf2 = HingeBasisFunction(self.parent, 1.0, 4, 2, True)
        self.bf3 = HingeBasisFunction(self.bf2, 1.0, 4, 3, True)
        self.bf4 = LinearBasisFunction(self.parent, 2)
        self.bf5 = HingeBasisFunction(self.parent, 1.5, 8, 2, True)
        self.basis.append(self.parent)
        self.basis.append(self.bf1)
        self.basis.append(self.bf2)
        self.basis.append(self.bf3)
        self.basis.append(self.bf4)
        self.basis.append(self.bf5)

    def test_anova_decomp(self):
        anova = self.basis.anova_decomp()
        assert_equal(set(anova[frozenset([1])]), set([self.bf1]))
        assert_equal(set(anova[frozenset([2])]), set([self.bf2, self.bf4,
                                                      self.bf5]))
        assert_equal(set(anova[frozenset([2, 3])]), set([self.bf3]))
        assert_equal(set(anova[frozenset()]), set([self.parent]))
        assert_equal(len(anova), 4)

    def test_smooth_knots(self):
        mins = [0.0, -1.0, 0.1, 0.2]
        maxes = [2.5, 3.5, 3.0, 2.0]
        knots = self.basis.smooth_knots(mins, maxes)
        assert_equal(knots[self.bf1], (0.0, 2.25))
        assert_equal(knots[self.bf2], (0.55, 1.25))
        assert_equal(knots[self.bf3], (0.6,  1.5))
        assert_true(self.bf4 not in knots)
        assert_equal(knots[self.bf5], (1.25, 2.25))

    def test_smooth(self):
        X = numpy.random.uniform(-2.0, 4.0, size=(20, 4))
        smooth_basis = self.basis.smooth(X)
        for bf, smooth_bf in zip(self.basis, smooth_basis):
            if type(bf) is HingeBasisFunction:
                assert_true(type(smooth_bf) is SmoothedHingeBasisFunction)
            elif type(bf) is ConstantBasisFunction:
                assert_true(type(smooth_bf) is ConstantBasisFunction)
            elif type(bf) is LinearBasisFunction:
                assert_true(type(smooth_bf) is LinearBasisFunction)
            else:
                raise AssertionError('Basis function is of an unexpected type.')
            assert_true(type(smooth_bf) in {SmoothedHingeBasisFunction,
                                            ConstantBasisFunction,
                                            LinearBasisFunction})
            if bf.has_knot():
                assert_equal(bf.get_knot(), smooth_bf.get_knot())

    def test_add(self):
        assert_equal(len(self.basis), 6)

    def test_pickle_compat(self):
        basis_copy = pickle.loads(pickle.dumps(self.basis))
        assert_true(self.basis == basis_copy)