Example #1
0
def MODWT_MARS_TRAIN(series, regressors=4, delay=1, N=2000):
	series = series[len(series)-N:]
	series = np.array(series)
	series = series.reshape(-1, 1)

	D = regressors  # number of regressors
	T = delay  # delay
	N = N
	series = series[500:]
	data = np.zeros((N - 500 - T - (D - 1) * T, D))
	lbls = np.zeros((N - 500 - T - (D - 1) * T,))

	for t in range((D - 1) * T, N - 500 - T):
		data[t - (D - 1) * T, :] = [series[t - 3 * T], series[t - 2 * T], series[t - T], series[t]]
		lbls[t - (D - 1) * T] = series[t + T]
	trnData = data[:lbls.size - round(lbls.size * 0.3), :]
	trnLbls = lbls[:lbls.size - round(lbls.size * 0.3)]

	mars = Earth()
	mars.fit(trnData, trnLbls)
	boosted_mars = AdaBoostRegressor(base_estimator=mars, n_estimators=25, learning_rate=0.01, loss='exponential')
	boosted_mars.fit(trnData, trnLbls)
	preds = boosted_mars.predict(trnData)

	return preds
Example #2
0
def test_pathological_cases():
    import pandas
    directory = os.path.join(
        os.path.dirname(os.path.abspath(__file__)), 'pathological_data')
    cases = {'issue_44': {},
             'issue_50': {'penalty': 0.5,
                          'minspan': 1,
                          'allow_linear': False,
                          'endspan': 1,
                          'check_every': 1,
                          'sample_weight': 'issue_50_weight.csv'}}
    for case, settings in cases.iteritems():
        data = pandas.read_csv(os.path.join(directory, case + '.csv'))
        y = data['y']
        del data['y']
        X = data
        if 'sample_weight' in settings:
            filename = os.path.join(directory, settings['sample_weight'])
            sample_weight = pandas.read_csv(filename)['sample_weight']
            del settings['sample_weight']
        else:
            sample_weight = None
        model = Earth(**settings)
        model.fit(X, y, sample_weight=sample_weight)
        with open(os.path.join(directory, case + '.txt'), 'r') as infile:
            correct = infile.read()
        assert_equal(model.summary(), correct)
Example #3
0
 def fit_mars(self, X_test):
     reg = Earth(max_terms=1000, max_degree=1, penalty=3)
     reg.fit(self.X.copy().values, self.y.copy().values.flatten())
     preds = reg.predict(X_test.copy().values)
     ids = X_test.index
     pred_df = pd.DataFrame(data=preds, index=ids, columns=['SalePrice'])
     pred_df.to_csv('results/results_mars.csv', sep=',')
Example #4
0
def test_pathological_cases():
    import pandas
    directory = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'pathological_data')
    cases = {
        'issue_44': {},
        'issue_50': {
            'penalty': 0.5,
            'minspan': 1,
            'allow_linear': False,
            'endspan': 1,
            'check_every': 1,
            'sample_weight': 'issue_50_weight.csv'
        }
    }
    for case, settings in cases.iteritems():
        data = pandas.read_csv(os.path.join(directory, case + '.csv'))
        y = data['y']
        del data['y']
        X = data
        if 'sample_weight' in settings:
            filename = os.path.join(directory, settings['sample_weight'])
            sample_weight = pandas.read_csv(filename)['sample_weight']
            del settings['sample_weight']
        else:
            sample_weight = None
        model = Earth(**settings)
        model.fit(X, y, sample_weight=sample_weight)
        with open(os.path.join(directory, case + '.txt'), 'r') as infile:
            correct = infile.read()
        assert_equal(model.summary(), correct)
Example #5
0
class MARS:
    def __init__(self, x_train, y_train, x_test, y_test):
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.classifier = None

    def fit(self):
        self.classifier = Earth()
        self.classifier.fit(self.x_train, self.y_train)

    def predict(self):
        return self.classifier.predict(self.x_test)

    def dichotomize(self, predictions):
        median = np.median(predictions)
        res = np.array([1 if y >= median else -1 for y in predictions])
        return res

    def evaluate(self):
        predictions = self.dichotomize(self.predict())
        # print(predictions)
        error = 0.0
        for y, correct in zip(predictions, self.y_test):
            if y != correct:
                error += 1
        return error / len(self.y_test)
Example #6
0
    def marsAccuracy(self):

        #setting index as date values
        self.df.index = self.df['Date']

        self.train = self.df[:200]
        self.valid = self.df[200:]

        #Split data:
        x_train = self.train.drop('Close', axis=1)
        y_train = self.train['Close']

        x_valid = self.valid.drop('Close', axis=1)
        y_valid = self.valid['Close']

        x_train = timeToFloat(x_train)
        x_valid = timeToFloat(x_valid)

        # define the model
        model = Earth()

        # fit the model on training dataset
        model.fit(x_train, y_train)
        self.preds = model.predict(x_valid)

        #Result
        #rmse
        rmse = np.sqrt(mean_squared_error(y_valid, self.preds))
        return rmse
Example #7
0
 def estimate_reward(self, z_train, y_train, z):
     rcond = None
     mars_model = Earth(max_degree=2)
     mars_model.fit(z_train, y_train)
     reward = mars_model.predict([z])
     # print("params: ", mars_model.coef_)
     return reward
def model_based_divergence(X, y, model_2):
    model_1 = Earth(feature_importance_type='gcv')
    model_1.fit(X, y)
    features_l = model_1.feature_importances_
    features_else = model_2.feature_importances_
    a_ = np.linalg.norm(features_l)
    b_ = np.linalg.norm(features_else)
    return np.dot(features_l, features_else) / (a_ * b_)
Example #9
0
def marsFit(x,y):
	model = Earth(max_degree=1)
	model.fit(x,y)

	def f(x):
		return model.predict(x)

	return model.predict(x), model, range(len(x)), f
Example #10
0
def test_fit():
    earth = Earth(**default_params)
    earth.fit(X, y)
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__), 'earth_regress.txt')
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_equal(res, prev)
Example #11
0
def test_smooth():
    model = Earth(penalty=1, smooth=True)
    model.fit(X, y)
    res = str(model.trace()) + '\n' + model.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress_smooth.txt')
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_equal(res, prev)
Example #12
0
def run_pyearth(X, y, **kwargs):
    '''Run with pyearth.  Return prediction value, training time, and number of forward pass iterations.'''
    model = Earth(**kwargs)
    t0 = time.time()
    model.fit(X, y)
    t1 = time.time()
    y_pred = model.predict(X)
    forward_iterations = len(model.forward_trace()) - 1
    return y_pred, t1 - t0, forward_iterations
Example #13
0
def test_fit():
    earth = Earth(**default_params)
    earth.fit(X, y)
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress.txt')
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_equal(res, prev)
Example #14
0
def test_smooth():
        model = Earth(penalty=1, smooth=True)
        model.fit(X, y)
        res = str(model.trace()) + '\n' + model.summary()
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_regress_smooth.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)
Example #15
0
def run_pyearth(X, y, **kwargs):
    '''Run with pyearth.  Return prediction value, training time, and number of forward pass iterations.'''
    model = Earth(**kwargs)
    t0 = time.time()
    model.fit(X, y)
    t1 = time.time()
    y_pred = model.predict(X)
    forward_iterations = len(model.forward_trace()) - 1
    return y_pred, t1 - t0, forward_iterations
Example #16
0
def HHT_MARS_TEST(series, regressors=4, delay=1, N=2000):
    series = series[len(series) - 2000:]
    series = np.array(series)
    series = series.reshape(-1, 1)

    D = regressors  # number of regressors
    T = delay  # delay
    N = N
    series = series[500:]
    data = np.zeros((N - 500 - T - (D - 1) * T, D))
    lbls = np.zeros((N - 500 - T - (D - 1) * T, ))

    for t in range((D - 1) * T, N - 500 - T):
        data[t - (D - 1) * T, :] = [
            series[t - 3 * T], series[t - 2 * T], series[t - T], series[t]
        ]
        lbls[t - (D - 1) * T] = series[t + T]
    trnData = data[:lbls.size - round(lbls.size * 0.3), :]
    trnLbls = lbls[:lbls.size - round(lbls.size * 0.3)]
    chkData = data[lbls.size - round(lbls.size * 0.3):, :]
    chkLbls = lbls[lbls.size - round(lbls.size * 0.3):]

    aa = np.array(chkLbls[-4:]).reshape(1, -1)
    chkData = np.append(chkData, aa, axis=0)

    mars = Earth()
    mars.fit(trnData, trnLbls)
    boosted_mars = AdaBoostRegressor(base_estimator=mars,
                                     n_estimators=25,
                                     learning_rate=0.1,
                                     loss='exponential')
    bag = BaggingRegressor(base_estimator=mars, n_estimators=25)
    bag.fit(trnData, trnLbls)
    boosted_mars.fit(trnData, trnLbls)
    pred2 = bag.predict(chkData)
    oos_preds = boosted_mars.predict(chkData)

    stack_predict = np.vstack([oos_preds, pred2]).T

    params_xgd = {
        'max_depth': 7,
        'objective': 'reg:linear',
        'learning_rate': 0.05,
        'n_estimators': 10000
    }
    clf = xgb.XGBRegressor(**params_xgd)
    clf.fit(stack_predict[:-1, :],
            chkLbls,
            eval_set=[(stack_predict[:-1, :], chkLbls)],
            eval_metric='rmse',
            early_stopping_rounds=20,
            verbose=False)

    xgb_pred = clf.predict(stack_predict)

    return xgb_pred
Example #17
0
def test_exhaustive_search():
    model = Earth(max_terms=13,
                  enable_pruning=False,
                  check_every=1,
                  thresh=0,
                  minspan=1,
                  endspan=1)
    model.fit(X, y)
    assert_equal(model.basis_.plen(), model.coef_.shape[1])
    assert_equal(model.transform(X).shape[1], len(model.basis_))
Example #18
0
def test_xlabels():

    model = Earth(**default_params)
    assert_raises(ValueError, model.fit, X[:, 0:5], y, xlabels=['var1', 'var2'])

    model = Earth(**default_params)
    model.fit(X[:, 0:3], y, xlabels=['var1', 'var2', 'var3'])

    model = Earth(**default_params)
    model.fit(X[:, 0:3], y, xlabels=['var1', 'var2', 'var3'])
Example #19
0
def test_xlabels():

    model = Earth(**default_params)
    assert_raises(ValueError, model.fit, X[:, 0:5], y, xlabels=['var1', 'var2'])

    model = Earth(**default_params)
    model.fit(X[:, 0:3], y, xlabels=['var1', 'var2', 'var3'])

    model = Earth(**default_params)
    model.fit(X[:, 0:3], y, xlabels=['var1', 'var2', 'var3'])
Example #20
0
def test_nb_terms():

    for max_terms in (1, 3, 12, 13):
        model = Earth(max_terms=max_terms)
        model.fit(X, y)
        assert_true(len(model.basis_) <= max_terms + 2)
        assert_true(len(model.coef_) <= len(model.basis_))
        assert_true(len(model.coef_) >= 1)
        if max_terms == 1:
            assert_list_almost_equal_value(model.predict(X), y.mean())
Example #21
0
def test_exhaustive_search():
    model = Earth(max_terms=13,
                  enable_pruning=False,
                  check_every=1,
                  thresh=0,
                  minspan=1,
                  endspan=1)
    model.fit(X, y)
    assert_equal(model.basis_.plen(), model.coef_.shape[1])
    assert_equal(model.transform(X).shape[1], len(model.basis_))
Example #22
0
def test_fit():
    earth = Earth(**default_params)
    earth.fit(X, y)
    res = str(earth.rsq_)
    filename = os.path.join(os.path.dirname(__file__), 'earth_regress.txt')
    #     with open(filename, 'w') as fl:
    #         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .01)
Example #23
0
def test_nb_terms():

    for max_terms in (1, 3, 12, 13):
        model = Earth(max_terms=max_terms)
        model.fit(X, y)
        assert_true(len(model.basis_) <= max_terms)
        assert_true(len(model.coef_) <= len(model.basis_))
        assert_true(len(model.coef_) >= 1)
        if max_terms == 1:
            assert_list_almost_equal_value(model.predict(X), y.mean())
Example #24
0
def test_smooth():
    model = Earth(penalty=1, smooth=True)
    model.fit(X, y)
    res = str(model.rsq_)
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress_smooth.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .01)
Example #25
0
def test_smooth():
    model = Earth(penalty=1, smooth=True)
    model.fit(X, y)
    res = str(model.rsq_)
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress_smooth.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .05)
Example #26
0
def test_fit():
    earth = Earth(**default_params)
    earth.fit(X, y)
    res = str(earth.rsq_)
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .05)
Example #27
0
 def MARS(self, X=None, Y=None):
     """This function is used to imeplement Multivariate Adadptive Regression Splines
     """
     from pyearth import Earth
     rgr = Earth()
     if (X is not None and Y is not None):
         (self.sampled_X, self.sampled_Y) = (X, Y)
     # train
     rgr.fit(self.sampled_X, self.sampled_Y)
     rgr.fit(self.sampled_X, self.sampled_Y)
     filename = './Model/ModelTransfer/MARS_' + self.mode + '.sav'
     pickle.dump(rgr, open(filename, 'wb'))
Example #28
0
def test_fit():
    numpy.random.seed(0)
    earth = Earth(**default_params)
    earth.fit(X, y)
    res = str(earth.rsq_)
    filename = os.path.join(os.path.dirname(__file__), 'earth_regress.txt')
    if regenerate_target_files:
        with open(filename, 'w') as fl:
            fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .05)
Example #29
0
def test_linvars():
    earth = Earth(**default_params)
    earth.fit(X, y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_linvars_regress.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()

    assert_equal(res, prev)
Example #30
0
def runModel(i,featureCombo):
    mae = np.array([])   
    logging.warning('try alpha = %s' % i)
    for ktrain,ktest in kf:
        x = trainCleaned.iloc[ktrain,]
        y = trainCleaned.iloc[ktest,]    
        model = Earth()
        model.fit(x[featureCombo],x['Expected'])
	pred = model.predict(y[featureCombo])
        mae = np.append(mae,(getMAE(pred,y['Expected'])))
    logging.warning('average 10-fold MAE for alpha %s feature %s' % (i,featureCombo))
    logging.warning(mae.mean())
Example #31
0
def test_linvars():
    earth = Earth(**default_params)
    earth.fit(X, y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_linvars_regress.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()

    assert_equal(res, prev)
Example #32
0
def test_fast():
    earth = Earth(max_terms=10, max_degree=5, **default_params)
    earth.fit(X, y)
    normal_summary = earth.summary()
    earth = Earth(use_fast=True,
                  max_terms=10,
                  max_degree=5,
                  fast_K=10,
                  fast_h=1,
                  **default_params)
    earth.fit(X, y)
    fast_summary = earth.summary()
    assert_equal(normal_summary, fast_summary)
Example #33
0
def test_smooth():
    numpy.random.seed(0)
    model = Earth(penalty=1, smooth=True)
    model.fit(X, y)
    res = str(model.rsq_)
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress_smooth.txt')
    if regenerate_target_files:
        with open(filename, 'w') as fl:
            fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .05)
Example #34
0
def test_nb_degrees():
    for max_degree in (1, 2, 12, 13):
        model = Earth(max_terms=10,
                      max_degree=max_degree,
                      enable_pruning=False,
                      check_every=1,
                      thresh=0,
                      minspan=1,
                      endspan=1)
        model.fit(X, y)
        for basis in model.basis_:
            assert_true(basis.degree() >= 0)
            assert_true(basis.degree() <= max_degree)
Example #35
0
def test_linvars():
    earth = Earth(**default_params)
    earth.fit(X, y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    res = str(earth.rsq_)
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_linvars_regress.txt')
    if regenerate_target_files:
        with open(filename, 'w') as fl:
            fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()

    assert_equal(res, prev)
Example #36
0
def test_nb_degrees():
    for max_degree in (1, 2, 12, 13):
        model = Earth(max_terms=10,
                      max_degree=max_degree,
                      enable_pruning=False,
                      check_every=1,
                      thresh=0,
                      minspan=1,
                      endspan=1)
        model.fit(X, y)
        for basis in model.basis_:
            assert_true(basis.degree() >= 0)
            assert_true(basis.degree() <= max_degree)
Example #37
0
def mars(p, xLabels, yLabel):
    global image_num
    criteria = ('rss', 'gcv', 'nb_subsets')
    # Randomly shuffle rows
    p = p.sample(frac=1).reset_index(drop=True)
    # Split train and test
    twentyPercent = -1 * round(p.shape[0] * 0.2)
    n = len(xLabels)
    xCol = p[xLabels].values.reshape(-1, n)
    X_train = xCol[:twentyPercent]
    X_test = xCol[twentyPercent:]
    y_train = p[yLabel][:twentyPercent].values.reshape(-1, 1)
    y_test = p[yLabel][twentyPercent:].values.reshape(-1, 1)
    # Fit MARS model
    model = Earth(feature_importance_type=criteria)
    model.fit(X_train, y_train)
    # Make predictions
    predicted = model.predict(X_test)
    r2 = r2_score(y_test, predicted)
    mse = mean_squared_error(y_test, predicted)
    predicted = predicted.reshape(-1, 1)
    # Plot residuals
    plotResiduals(y_test, predicted)
    # Print summary
    print(model.trace())
    print(model.summary())
    # Plot feature importances
    importances = model.feature_importances_
    for crit in criteria:
        x = list(range(0, len(xLabels)))
        sorted_rss = [
            list(t)
            for t in sorted(zip(importances[crit], xLabels), reverse=True)
        ]
        coeff = []
        feature = []
        for j in range(0, len(sorted_rss)):
            coeff.append(abs(sorted_rss[j][0]))
            feature.append(featureToLabel[sorted_rss[j][1]])
        plt.clf()
        plt.xticks(x, feature, rotation='vertical')
        plt.bar(x, coeff, align='center', alpha=0.5)
        plt.xlabel('Features')
        label = "Importance (" + crit + ")"
        plt.ylabel(label)
        plt.tight_layout()
        label = "mars_imp_" + crit
        plt.show()
        plt.savefig(image_path.format(image_num), bbox_inches='tight')
        image_num += 1
    return r2, mse
Example #38
0
def test_missing_data():
    earth = Earth(allow_missing=True, **default_params)
    missing_ = numpy.random.binomial(1, .05, X.shape).astype(bool)
    X_ = X.copy()
    X_[missing_] = None
    earth.fit(X_, y)
    res = str(earth.score(X_, y))
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress_missing_data.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .03)
Example #39
0
def test_linear_fit():
    from statsmodels.regression.linear_model import GLS, OLS

    earth = Earth(**default_params)
    earth.fit(X, y)
    earth.linear_fit(X, y)
    soln = OLS(y, earth.transform(X)).fit().params
    assert_almost_equal(numpy.mean((earth.coef_ - soln)**2), 0.0)

    sample_weight = 1.0 / (numpy.random.normal(size=y.shape)**2)
    earth.fit(X, y)
    earth.linear_fit(X, y, sample_weight)
    soln = GLS(y, earth.transform(X), 1.0 / sample_weight).fit().params
    assert_almost_equal(numpy.mean((earth.coef_ - soln)**2), 0.0)
Example #40
0
def test_missing_data():
    earth = Earth(allow_missing=True, **default_params)
    missing_ = numpy.random.binomial(1, .05, X.shape).astype(bool)
    X_ = X.copy()
    X_[missing_] = None
    earth.fit(X_, y)
    res = str(earth.score(X_, y))
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress_missing_data.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_true(abs(float(res) - float(prev)) < .03)
Example #41
0
class Diagnostics:
    def __init__(self, env, features, *args, **kwargs):
        self.env = env
        self.solution = features
        self.data = env.X.loc[:, features.astype(bool)].copy()
        self.y = self.env.y
        self.model = EarthModel(*args, **kwargs)
        self.y_pred = None
        self.error = None
        self._fit()

    def _fit(self):
        self.model.fit(self.data, self.y)
        self.y_pred = self.model.predict(self.data)
        self.error = (self.y_pred.flatten() - self.env.y.flatten())

    def summary(self):
        return model_summary(self.model,
                             self.data.columns).sort_values("feature")

    def plot_thresholds(self):
        return plot_thresholds(self.summary(), self.data)

    def plot_autocorrelations(self):
        from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
        _ = plot_pacf(self.error)
        _ = plot_acf(self.error)

    def plot_qq(self):
        fig, ax = plt.subplots()
        _, (slope, intercept, r_norm) = scipy.stats.probplot(self.error,
                                                             plot=ax,
                                                             fit=True)
        print("R squared {:.4f}".format(r_norm**2))

    def plot_pred(self):
        df = pd.DataFrame({
            "predicted": self.y_pred,
            "True value": self.y
        },
                          index=self.data.index)
        return df.hvplot().opts(
            title="Model prediction for {}".format(self.env.target))

    def score(self):
        mse, gvc, rsq, grsq = self.model.mse_, self.model.gcv_, self.model.rsq_, self.model.grsq_
        msg = "MSE: {:.4f}, GCV: {:.4f}, RSQ:{:.4f}, GRSQ: {:.4f}".format(
            mse, gvc, rsq, grsq)
        return msg
Example #42
0
 def marsmodelorr(self, use_smY=True, slope_trunc=0.00001, savgol_window=151, savgol_order=3, ex_order=51):
     Xf, Yf = self.Xf_, self.Yf_
     X, Y = self.X_, self.Y_
     fom = {}
     # smooth the data
     smY = savgol(Y, savgol_window, savgol_order)
     # perform mars
     model = MARS()
     if use_smY:
         model.fit(X, smY)
     else:
         model.fit(X, Y)
     Y_h = model.predict(X)
     '''
     calculate dydx based on mars model to get knots and intercepts as this is 
     complicated to extract from hinge functions
     '''
     diff1 = np.diff(Y_h) / np.diff(X)
     tdiff1 = diff1 - np.nanmin(diff1)
     tdiff1 = tdiff1 / np.nanmax(tdiff1)
     #calculate slopes of linear segments
     ID = [i for i in range(1, len(tdiff1)) if np.abs(tdiff1[i] - tdiff1[i - 1]) > slope_trunc]
     ID.insert(0, 0)
     ID.append(np.argmax(X))  # this might cause an error
     slopes = [np.nanmean(diff1[ID[i - 1]:ID[i]]) for i in range(1, len(ID) - 1)]
     a = [Y_h[ID[i]] - slopes[i] * X[ID[i]] for i in range(len(ID) - 2)]
     IDM, IDm = np.argmax(slopes), np.argmin(np.abs(slopes))
     # intercept of highest slope and zero as well as highest slope and lowest slope
     fom['zinter'] = -a[IDM] / slopes[IDM]
     fom['lminter'] = (a[IDM] - a[IDm]) / (slopes[IDm] - slopes[IDM])
     fom['max_slope'] = slopes[IDM]
     fom['curr_lminter_model'] = fom['lminter'] * slopes[IDM] + a[IDM]
     fom['curr_lminter_data'] = np.mean(Y[np.where(np.abs(X - fom['lminter']) < 0.5)[0]])
     # calculate how the CV curves kight look like without the 'ORR part'
     srYs = smY - model.predict(X)
     srYf = savgol(Yf - model.predict(Xf), savgol_window, savgol_order)
     # calculate their derivative
     dsrYf = savgol(np.diff(srYf) / np.diff(Xf), savgol_window, savgol_order)
     # find the extrema in the derivatives for extraction of redox pots
     redID_f = argrelextrema(srYf, np.less, order=ex_order)
     oxID_f = argrelextrema(srYf, np.greater, order=ex_order)
     # calc some more foms like position of redox waves
     fom['redpot_f'], fom['redpot_f_var'] = np.nanmean(Xf[redID_f]), np.nanstd(Xf[redID_f])
     fom['oxpot_f'], fom['oxpot_f_var'] = np.nanmean(Xf[oxID_f]), np.nanstd(Xf[oxID_f])
     fom['X'], fom['Xf'] = X, Xf
     fom['srYs'], fom['srYf'], fom['smY'] = srYs, srYf, smY
     fom['Y'], fom['Yf'], fom['Y_h'] = Y, Yf, Y_h
     fom['noise_lvl'] = np.sum((Y_h - Y) ** 2, axis=0)
     self.fom = fom
Example #43
0
def test_fast():
    earth = Earth(max_terms=10,
                  max_degree=5,
                  **default_params)
    earth.fit(X, y)
    normal_summary = earth.summary()
    earth = Earth(use_fast=True,
                  max_terms=10,
                  max_degree=5,
                  fast_K=10,
                  fast_h=1,
                  **default_params)
    earth.fit(X, y)
    fast_summary = earth.summary()
    assert_equal(normal_summary, fast_summary)
Example #44
0
def test_linear_fit():
    from statsmodels.regression.linear_model import GLS, OLS

    earth = Earth(**default_params)
    earth.fit(X, y)
    earth._Earth__linear_fit(X, y)
    soln = OLS(y, earth.transform(X)).fit().params
    assert_almost_equal(numpy.mean((earth.coef_ - soln) ** 2), 0.0)

    sample_weight = 1.0 / (numpy.random.normal(size=y.shape) ** 2)
    earth.fit(X, y)
    earth._Earth__linear_fit(X, y, sample_weight)
    soln = GLS(y, earth.transform(
        X), 1.0 / sample_weight).fit().params
    assert_almost_equal(numpy.mean((earth.coef_ - soln) ** 2), 0.0)
Example #45
0
def calculate_earth_error(X, y, *args, **kwargs):
    earth = EarthModel(*args, **kwargs)
    model = earth.fit(X, y)
    pred = model.predict(X)
    error = pred.flatten() - y.flatten()
    features = get_signifficant_features(model)
    return error, model, features
Example #46
0
def test_sparse():
    X_sparse = csr_matrix(X)

    model = Earth(**default_params)
    assert_raises(TypeError, model.fit, X_sparse, y)

    model = Earth(**default_params)
    model.fit(X, y)
    assert_raises(TypeError, model.predict, X_sparse)
    assert_raises(TypeError, model.predict_deriv, X_sparse)
    assert_raises(TypeError, model.transform, X_sparse)
    assert_raises(TypeError, model.score, X_sparse)

    model = Earth(**default_params)
    sample_weight = csr_matrix([1.] * X.shape[0])
    assert_raises(TypeError, model.fit, X, y, sample_weight)
Example #47
0
def test_sparse():
    X_sparse = csr_matrix(X)

    model = Earth(**default_params)
    assert_raises(TypeError, model.fit, X_sparse, y)

    model = Earth(**default_params)
    model.fit(X, y)
    assert_raises(TypeError, model.predict, X_sparse)
    assert_raises(TypeError, model.predict_deriv, X_sparse)
    assert_raises(TypeError, model.transform, X_sparse)
    assert_raises(TypeError, model.score, X_sparse)

    model = Earth(**default_params)
    sample_weight = csr_matrix([1.] * X.shape[0])
    assert_raises(TypeError, model.fit, X, y, sample_weight)
Example #48
0
def test_deriv():

    model = Earth(**default_params)
    model.fit(X, y)
    assert_equal(X.shape + (1,), model.predict_deriv(X).shape)
    assert_equal((X.shape[0], 1, 1), model.predict_deriv(X, variables=0).shape)
    assert_equal((X.shape[0], 1, 1), model.predict_deriv(X, variables='x0').shape)
    assert_equal((X.shape[0], 3, 1),
                 model.predict_deriv(X, variables=[1, 5, 7]).shape)
    assert_equal((X.shape[0], 0, 1), model.predict_deriv(X, variables=[]).shape)

    res_deriv = model.predict_deriv(X, variables=['x2', 'x7', 'x0', 'x1'])
    assert_equal((X.shape[0], 4, 1), res_deriv.shape)

    res_deriv = model.predict_deriv(X, variables=['x0'])
    assert_equal((X.shape[0], 1, 1), res_deriv.shape)

    assert_equal((X.shape[0], 1, 1), model.predict_deriv(X, variables=[0]).shape)
Example #49
0
def test_pickle_compatibility():
    earth = Earth(**default_params)
    model = earth.fit(X, y)
    model_copy = pickle.loads(pickle.dumps(model))
    assert_true(model_copy == model)
    assert_true(
        numpy.all(model.predict(X) == model_copy.predict(X)))
    assert_true(model.basis_[0] is model.basis_[1]._get_root())
    assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
Example #50
0
def getTrain(trainData, testData):

    size_s = len(trainData)
    size_t = len(testData)
    lenY = len(testData[0])



    X = numpy.zeros((size_s,lenY-1))
    Y = numpy.zeros((size_s,1))

    z = 0

    for d in trainData:
        for j in range(lenY-1):
            X[z][j] = d[j]
        Y[z][0] = float(d[lenY-1])
        z += 1

    z = 0
    dX = numpy.zeros((size_t,lenY-1))

    for d in testData:
        for j in range(lenY-1):
            dX[z][j] = d[j]
        z += 1

    model = Earth()
    model.fit(X,Y)


    y_hat = model.predict(dX)

    corrent = 0

    for i in range(size_t):
        x1 = testData[i][lenY-1]
        x2 = y_hat[i]

        if x1 * x2 >= 0:
            corrent += 1
    return corrent
Example #51
0
def test_pandas_compatibility():
    import pandas
    X_df = pandas.DataFrame(X)
    y_df = pandas.DataFrame(y)
    colnames = ['xx' + str(i) for i in range(X.shape[1])]
    X_df.columns = colnames

    earth = Earth(**default_params)
    model = earth.fit(X_df, y_df)
    assert_list_equal(
        colnames, model.forward_trace()._getstate()['xlabels'])
Example #52
0
def test_feature_importance():
    criteria = ('rss', 'gcv', 'nb_subsets')
    for imp in criteria:
        earth = Earth(feature_importance_type=imp, **default_params)
        earth.fit(X, y)
        assert len(earth.feature_importances_) == X.shape[1]
    earth = Earth(feature_importance_type=criteria, **default_params)
    earth.fit(X, y)
    assert type(earth.feature_importances_) == dict
    assert set(earth.feature_importances_.keys()) == set(criteria)
    for crit, val in earth .feature_importances_.items():
        assert len(val) == X.shape[1]

    assert_raises(
            ValueError,
            Earth(feature_importance_type='bad_name', **default_params).fit,
            X, y)

    earth = Earth(feature_importance_type=('rss',), **default_params)
    earth.fit(X, y)
    assert len(earth.feature_importances_) == X.shape[1]

    assert_raises(
            ValueError,
            Earth(feature_importance_type='rss', enable_pruning=False, **default_params).fit,
            X, y)
Example #53
0
def test_shape():
    model = Earth(**default_params)
    model.fit(X, y)

    X_reduced = X[:, 0:5]
    assert_raises(ValueError, model.predict, X_reduced)
    assert_raises(ValueError, model.predict_deriv, X_reduced)
    assert_raises(ValueError, model.transform, X_reduced)
    assert_raises(ValueError, model.score, X_reduced)

    model = Earth(**default_params)
    X_subsampled = X[0:10]
    assert_raises(ValueError, model.fit, X_subsampled, y)

    model = Earth(**default_params)
    y_subsampled = X[0:10]
    assert_raises(ValueError, model.fit, X, y_subsampled)

    model = Earth(**default_params)
    sample_weights = numpy.array([1.] * len(X))
    sample_weights_subsampled = sample_weights[0:10]
    assert_raises(ValueError, model.fit, X, y, sample_weights_subsampled)
"""
=====================================================
Exporting a fitted Earth models as a sympy expression
=====================================================

A simple example returning a sympy expression describing the fit of a sine function computed by Earth.

"""

import numpy
from pyearth import Earth
from pyearth import export

# Create some fake data
numpy.random.seed(2)
m = 1000
n = 10
X = 10 * numpy.random.uniform(size=(m, n)) - 40
y = 100 * (numpy.sin((X[:, 6])) - 4.0) + 10 * numpy.random.normal(size=m)

# Fit an Earth model
model = Earth(max_degree=2, minspan_alpha=0.5, verbose=False)
model.fit(X, y)

print(model.summary())

# return sympy expression
print("Resulting sympy expression:")
print(export.export_sympy(model))
Example #55
0
def test_score():
    earth = Earth(**default_params)
    model = earth.fit(X, y)
    record = model.pruning_trace()
    rsq = record.rsq(record.get_selected())
    assert_almost_equal(rsq, model.score(X, y))
Example #56
0
class MARSInterpolant(Surrogate):
    """Compute and evaluate a MARS interpolant

    MARS builds a model of the form

    .. math::

        \\hat{f}(x) = \\sum_{i=1}^{k} c_i B_i(x).

    The model is a weighted sum of basis functions :math:`B_i(x)`. Each basis
    function :math:`B_i(x)` takes one of the following three forms:

    1. a constant 1.
    2. a hinge function of the form :math:`\\max(0, x - const)` or \
       :math:`\\max(0, const - x)`. MARS automatically selects variables \
       and values of those variables for knots of the hinge functions.
    3. a product of two or more hinge functions. These basis functions c \
       an model interaction between two or more variables.

    :param dim: Number of dimensions
    :type dim: int

    :ivar dim: Number of dimensions
    :ivar num_pts: Number of points in surrogate model
    :ivar X: Point incorporated in surrogate model (num_pts x dim)
    :ivar fX: Function values in surrogate model (num_pts x 1)
    :ivar updated: True if model is up-to-date (no refit needed)
    :ivar model: Earth object
    """
    def __init__(self, dim):
        self.num_pts = 0
        self.X = np.empty([0, dim])
        self.fX = np.empty([0, 1])
        self.dim = dim
        self.updated = False

        try:
            from pyearth import Earth
            self.model = Earth()
        except ImportError as err:
            print("Failed to import pyearth")
            raise err

    def _fit(self):
        """Compute new coefficients if the MARS interpolant is not updated."""
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")  # Surpress deprecation warnings
            if self.updated is False:
                self.model.fit(self.X, self.fX)
                self.updated = True

    def predict(self, xx):
        """Evaluate the MARS interpolant at the points xx

        :param xx: Prediction points, must be of size num_pts x dim or (dim, )
        :type xx: numpy.ndarray

        :return: Prediction of size num_pts x 1
        :rtype: numpy.ndarray
        """
        self._fit()
        xx = np.atleast_2d(xx)
        return np.expand_dims(self.model.predict(xx), axis=1)

    def predict_deriv(self, xx):
        """Evaluate the derivative of the MARS interpolant at points xx

        :param xx: Prediction points, must be of size num_pts x dim or (dim, )
        :type xx: numpy.array

        :return: Derivative of the RBF interpolant at xx
        :rtype: numpy.array
        """
        self._fit()
        xx = np.expand_dims(xx, axis=0)
        dfx = self.model.predict_deriv(xx, variables=None)
        return dfx[0]
Example #57
0
from sklearn import preprocessing
from sklearn.feature_extraction import DictVectorizer
from pyearth import Earth
from matplotlib import pyplot

df = pd.read_excel('relay-foods.xlsx', sheetname='Purchase Data - Full Study')
df['OrderId'] = df['OrderId'].astype('category')
df['CommonId'] = df['CommonId'].astype('category')


df['OrderId'] = df['OrderId'].astype('category')
df['CommonId'] = df['CommonId'].astype('category')
df.dtypes
col_names = ['OrderDate', 'PickupDate']
df = df.drop(col_names, axis=1)
y = df['TotalCharges']
df_2 = df[['OrderId', 'UserId', 'PupId']]
#del df['OrderDate']
X = [dict(r.iteritems()) for _, r in df_2.iterrows()]
train_fea = DictVectorizer().fit_transform(X)

#Fit an Earth model
model = Earth()
model.fit(train_fea,y)

#Print the model
print(model.trace())
print(model.summary())

#Plot the model
y_hat = model.predict(X)
Example #58
0
y_mix = np.concatenate((y1[:, np.newaxis], y2[:, np.newaxis]), axis=1)

alphas = [1., 0.8, 0.6, 0.4, 0.2, 0.]
n_plots = len(alphas)
k = 1
fig = plt.figure()
for i, alpha in enumerate(alphas):
    # Fit an Earth model
    model = Earth(max_degree=5,
                  minspan_alpha=.05,
                  endspan_alpha=.05,
                  max_terms=10,
                  check_every=1,
                  thresh=0.)
    output_weight = np.array([alpha, 1 - alpha])
    model.fit(X, y_mix, output_weight=output_weight)
    print(model.summary())

    # Plot the model
    y_hat = model.predict(X)

    mse = ((y_hat - y_mix) ** 2).mean(axis=0)
    ax = plt.subplot(n_plots, 2, k)
    ax.set_ylabel("Run {0}".format(i + 1), rotation=0, labelpad=20)
    plt.plot(X[:, 6], y_mix[:, 0], 'r.')
    plt.plot(X[:, 6], model.predict(X)[:, 0], 'b.')
    plt.title("MSE: {0:.3f}, Weight : {1:.1f}".format(mse[0], alpha))
    plt.subplot(n_plots, 2, k + 1)
    plt.plot(X[:, 5], y_mix[:, 1], 'r.')
    plt.plot(X[:, 5], model.predict(X)[:, 1], 'b.')
    plt.title("MSE: {0:.3f}, Weight : {1:.1f}".format(mse[1], 1 - alpha))
Example #59
0
@author: jasonrudy
'''
import numpy
from pyearth import Earth
from matplotlib import pyplot
m = 1000
x = 20*(numpy.random.uniform(size=(m,1)) - .5)
y = x[:,0]*(x[:,0]<0) + x[:,0]*(x[:,0]>0) + 1*numpy.random.normal(size=m)
print y.shape
print y.dtype
print y
print x.shape
print x.dtype

model = Earth()
model.fit(x,y)
y_hat = model.predict(x)
print model.trace()
print model
pyplot.figure(figsize=(10,5))
pyplot.plot(x[:,0],y,'r.')
pyplot.plot(x[:,0],y_hat,'b.')
ax = pyplot.gca()
pyplot.setp(ax, frame_on=False)
pyplot.savefig('demo.pdf',transparent=True)




Example #60
0
def mars_regr(x, y):
    model = Earth()
    regr= model.fit(np.asarray(x),np.asarray(y))
    return regr