Example #1
0
def test_fast():
    earth = Earth(max_terms=10, max_degree=5, **default_params)
    earth.fit(X, y)
    normal_summary = earth.summary()
    earth = Earth(use_fast=True,
                  max_terms=10,
                  max_degree=5,
                  fast_K=10,
                  fast_h=1,
                  **default_params)
    earth.fit(X, y)
    fast_summary = earth.summary()
    assert_equal(normal_summary, fast_summary)
Example #2
0
def test_fast():
    earth = Earth(max_terms=10,
                  max_degree=5,
                  **default_params)
    earth.fit(X, y)
    normal_summary = earth.summary()
    earth = Earth(use_fast=True,
                  max_terms=10,
                  max_degree=5,
                  fast_K=10,
                  fast_h=1,
                  **default_params)
    earth.fit(X, y)
    fast_summary = earth.summary()
    assert_equal(normal_summary, fast_summary)
Example #3
0
def test_pathological_cases():
    import pandas
    directory = os.path.join(
        os.path.dirname(os.path.abspath(__file__)), 'pathological_data')
    cases = {'issue_44': {},
             'issue_50': {'penalty': 0.5,
                          'minspan': 1,
                          'allow_linear': False,
                          'endspan': 1,
                          'check_every': 1,
                          'sample_weight': 'issue_50_weight.csv'}}
    for case, settings in cases.iteritems():
        data = pandas.read_csv(os.path.join(directory, case + '.csv'))
        y = data['y']
        del data['y']
        X = data
        if 'sample_weight' in settings:
            filename = os.path.join(directory, settings['sample_weight'])
            sample_weight = pandas.read_csv(filename)['sample_weight']
            del settings['sample_weight']
        else:
            sample_weight = None
        model = Earth(**settings)
        model.fit(X, y, sample_weight=sample_weight)
        with open(os.path.join(directory, case + '.txt'), 'r') as infile:
            correct = infile.read()
        assert_equal(model.summary(), correct)
Example #4
0
def test_pathological_cases():
    import pandas
    directory = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'pathological_data')
    cases = {
        'issue_44': {},
        'issue_50': {
            'penalty': 0.5,
            'minspan': 1,
            'allow_linear': False,
            'endspan': 1,
            'check_every': 1,
            'sample_weight': 'issue_50_weight.csv'
        }
    }
    for case, settings in cases.iteritems():
        data = pandas.read_csv(os.path.join(directory, case + '.csv'))
        y = data['y']
        del data['y']
        X = data
        if 'sample_weight' in settings:
            filename = os.path.join(directory, settings['sample_weight'])
            sample_weight = pandas.read_csv(filename)['sample_weight']
            del settings['sample_weight']
        else:
            sample_weight = None
        model = Earth(**settings)
        model.fit(X, y, sample_weight=sample_weight)
        with open(os.path.join(directory, case + '.txt'), 'r') as infile:
            correct = infile.read()
        assert_equal(model.summary(), correct)
def mars(x_train, x_test, y_train, y_test, timestamp):
    # set model
    model = Earth(max_degree=1, penalty=1.0, endspan=5)

    # predict
    model = model.fit(x_train, y_train)

    y_pred = model.predict(x_test)

    # score
    # score=model.score(x_test,y_test)

    correlation_matrix = np.corrcoef(y_test, y_pred)
    correlation_xy = correlation_matrix[0, 1]
    score = correlation_xy**2

    MSE, MAD, MAPE = outputReport.regression_basic_results(y_test, y_pred)
    fileName, result = outputReport.regression_extanded_results(
        timestamp, y_test, y_pred, "mars")
    try:
        model_summary = str(model.summary())
        model_summary_final = model_summary.replace("\n", "<br>")
        result += "<br>Model Parameters:<br>" + str(model.get_params(
        )) + "<br>Model Summary:<br>" + model_summary_final
    except:
        result += "<br>Model Summary is not available for MARS"
    return score, fileName, MSE, MAD, MAPE, result
Example #6
0
def test_fit():
    earth = Earth(**default_params)
    earth.fit(X, y)
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__), 'earth_regress.txt')
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_equal(res, prev)
Example #7
0
def test_smooth():
    model = Earth(penalty=1, smooth=True)
    model.fit(X, y)
    res = str(model.trace()) + '\n' + model.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress_smooth.txt')
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_equal(res, prev)
Example #8
0
def test_fit():
    earth = Earth(**default_params)
    earth.fit(X, y)
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_regress.txt')
    with open(filename, 'r') as fl:
        prev = fl.read()
    assert_equal(res, prev)
Example #9
0
def test_smooth():
        model = Earth(penalty=1, smooth=True)
        model.fit(X, y)
        res = str(model.trace()) + '\n' + model.summary()
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_regress_smooth.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)
Example #10
0
def get_cpu_model_mars(all_cpu_stats_csv):

    # Get the data frame
    df = pd.read_csv(all_cpu_stats_csv, sep=';')

    # Drop the target feature (ssd_tot_cpu, hdd_tot_cpu)
    X = df.drop('cpu', axis=1)

    # Split data to training and validation data sets
    #~ x_train, x_val, y_train, y_val = train_test_split(X,
    #~ df['cpu'],
    #~ test_size = 0.33,
    #~ random_state = 5)
    # Print Earth model
    model = Earth()
    model.fit(X, df['cpu'])
    print '####### Earth Mars Regression Model ########'
    print model.summary()
Example #11
0
def test_untrained():

    model = Earth(**default_params)
    assert_raises(NotFittedError, model.predict, X)
    assert_raises(NotFittedError, model.transform, X)
    assert_raises(NotFittedError, model.predict_deriv, X)
    assert_raises(NotFittedError, model.score, X)

    # the following should be changed to raise NotFittedError
    assert_equal(model.forward_trace(), None)
    assert_equal(model.pruning_trace(), None)
    assert_equal(model.summary(), "Untrained Earth Model")
Example #12
0
def test_untrained():

    model = Earth(**default_params)
    assert_raises(NotFittedError, model.predict, X)
    assert_raises(NotFittedError, model.transform, X)
    assert_raises(NotFittedError, model.predict_deriv, X)
    assert_raises(NotFittedError, model.score, X)

    # the following should be changed to raise NotFittedError
    assert_equal(model.forward_trace(), None)
    assert_equal(model.pruning_trace(), None)
    assert_equal(model.summary(), "Untrained Earth Model")
Example #13
0
def test_linvars():
    earth = Earth(**default_params)
    earth.fit(X, y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_linvars_regress.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()

    assert_equal(res, prev)
Example #14
0
def test_linvars():
    earth = Earth(**default_params)
    earth.fit(X, y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    res = str(earth.trace()) + '\n' + earth.summary()
    filename = os.path.join(os.path.dirname(__file__),
                            'earth_linvars_regress.txt')
#     with open(filename, 'w') as fl:
#         fl.write(res)
    with open(filename, 'r') as fl:
        prev = fl.read()

    assert_equal(res, prev)
Example #15
0
def mars(p, xLabels, yLabel):
    global image_num
    criteria = ('rss', 'gcv', 'nb_subsets')
    # Randomly shuffle rows
    p = p.sample(frac=1).reset_index(drop=True)
    # Split train and test
    twentyPercent = -1 * round(p.shape[0] * 0.2)
    n = len(xLabels)
    xCol = p[xLabels].values.reshape(-1, n)
    X_train = xCol[:twentyPercent]
    X_test = xCol[twentyPercent:]
    y_train = p[yLabel][:twentyPercent].values.reshape(-1, 1)
    y_test = p[yLabel][twentyPercent:].values.reshape(-1, 1)
    # Fit MARS model
    model = Earth(feature_importance_type=criteria)
    model.fit(X_train, y_train)
    # Make predictions
    predicted = model.predict(X_test)
    r2 = r2_score(y_test, predicted)
    mse = mean_squared_error(y_test, predicted)
    predicted = predicted.reshape(-1, 1)
    # Plot residuals
    plotResiduals(y_test, predicted)
    # Print summary
    print(model.trace())
    print(model.summary())
    # Plot feature importances
    importances = model.feature_importances_
    for crit in criteria:
        x = list(range(0, len(xLabels)))
        sorted_rss = [
            list(t)
            for t in sorted(zip(importances[crit], xLabels), reverse=True)
        ]
        coeff = []
        feature = []
        for j in range(0, len(sorted_rss)):
            coeff.append(abs(sorted_rss[j][0]))
            feature.append(featureToLabel[sorted_rss[j][1]])
        plt.clf()
        plt.xticks(x, feature, rotation='vertical')
        plt.bar(x, coeff, align='center', alpha=0.5)
        plt.xlabel('Features')
        label = "Importance (" + crit + ")"
        plt.ylabel(label)
        plt.tight_layout()
        label = "mars_imp_" + crit
        plt.show()
        plt.savefig(image_path.format(image_num), bbox_inches='tight')
        image_num += 1
    return r2, mse
def get_cpu_model_mars(cpu_stats_csv):
	
	# Get the data frame
	df = pd.read_csv(cpu_stats_csv,sep=';')
	
	# Drop the target feature (ssd_tot_cpu, hdd_tot_cpu)
	X = df.drop('ssd_tot_cpu', axis=1).drop('hdd_tot_cpu', axis=1)
	
	# Split data to training and validation data sets
	x_train_h, x_val_h, y_train_h, y_val_h = train_test_split(X, 
														df['hdd_tot_cpu'], 
														test_size = 0.33, 
														random_state = 5)
	x_train_s, x_val_s, y_train_s, y_val_s = train_test_split(X, 
														df['ssd_tot_cpu'], 
														test_size = 0.33, 
														random_state = 5)
	# Print Earth model
	mdl_h = Earth().fit(x_train_h, y_train_h)
	mdl_s = Earth().fit(x_train_s, y_train_s)
	
	print mdl_h.summary()
	print mdl_s.summary()
Example #17
0
def test_untrained():
    # NotFittedError moved from utils.validation to exceptions
    # some time after 0.17.1
    try:
        from sklearn.exceptions import NotFittedError
    except ImportError:
        from sklearn.utils.validation import NotFittedError

    # Make sure calling methods that require a fitted Earth object
    # raises the appropriate exception when using a not yet fitted
    # Earth object
    model = Earth(**default_params)
    assert_raises(NotFittedError, model.predict, X)
    assert_raises(NotFittedError, model.transform, X)
    assert_raises(NotFittedError, model.predict_deriv, X)
    assert_raises(NotFittedError, model.score, X)

    # the following should be changed to raise NotFittedError
    assert_equal(model.forward_trace(), None)
    assert_equal(model.pruning_trace(), None)
    assert_equal(model.summary(), "Untrained Earth Model")
Example #18
0
def test_untrained():
    # NotFittedError moved from utils.validation to exceptions
    # some time after 0.17.1
    try:
        from sklearn.exceptions import NotFittedError
    except ImportError:
        from sklearn.utils.validation import NotFittedError
    
    # Make sure calling methods that require a fitted Earth object
    # raises the appropriate exception when using a not yet fitted 
    # Earth object
    model = Earth(**default_params)
    assert_raises(NotFittedError, model.predict, X)
    assert_raises(NotFittedError, model.transform, X)
    assert_raises(NotFittedError, model.predict_deriv, X)
    assert_raises(NotFittedError, model.score, X)

    # the following should be changed to raise NotFittedError
    assert_equal(model.forward_trace(), None)
    assert_equal(model.pruning_trace(), None)
    assert_equal(model.summary(), "Untrained Earth Model")
def mars_forecast(x_train, x_test, y_train, timestamp):
    # set model
    model = Earth(max_degree=1, penalty=1.0, endspan=5)

    # predict
    model = model.fit(x_train, y_train)

    y_pred = pd.DataFrame(model.predict(x_test), columns=["Forecasted Values"])

    filename = outputReport.regression_extanded_results_forecast(
        timestamp, y_pred, "mars forecast")

    try:
        model_summary = str(model.summary())
        model_summary_final = model_summary.replace("\n", "<br>")
        result = "<br>Model Parameters:<br>" + str(model.get_params(
        )) + "<br>Model Summary:<br>" + model_summary_final
    except:
        result = "Model Summary is not available for MARS"

    result += str(
        y_pred.to_html(formatters={'Name': lambda x: '<b>' + x + '</b>'}))

    return filename, result
Example #20
0
from sklearn import preprocessing
from sklearn.feature_extraction import DictVectorizer
from pyearth import Earth
from matplotlib import pyplot

df = pd.read_excel('relay-foods.xlsx', sheetname='Purchase Data - Full Study')
df['OrderId'] = df['OrderId'].astype('category')
df['CommonId'] = df['CommonId'].astype('category')


df['OrderId'] = df['OrderId'].astype('category')
df['CommonId'] = df['CommonId'].astype('category')
df.dtypes
col_names = ['OrderDate', 'PickupDate']
df = df.drop(col_names, axis=1)
y = df['TotalCharges']
df_2 = df[['OrderId', 'UserId', 'PupId']]
#del df['OrderDate']
X = [dict(r.iteritems()) for _, r in df_2.iterrows()]
train_fea = DictVectorizer().fit_transform(X)

#Fit an Earth model
model = Earth()
model.fit(train_fea,y)

#Print the model
print(model.trace())
print(model.summary())

#Plot the model
y_hat = model.predict(X)
Example #21
0
# X_test = np.array(test.iloc[:, 0:13])
# X_test_id = test.iloc[:, 0]

np.random.seed(0)
m = 1000
n = 10
X = 80 * np.random.uniform(size=(m, n)) - 40
y = np.abs(X[:, 6] - 4.0) + 1 * np.random.normal(size=m)

#Fit an Earth model
model = Earth()
model.fit(X, y)

#Print the model
print(model.trace())
print(model.summary())

X, y = load_boston(return_X_y=True)
model_rsq_dic = {}

# # % lower status of the population
lstat_x = []
[lstat_x.append(row[12]) for row in X]

lstat_x = np.array(lstat_x).reshape(-1, 1)

#lstat_x = X
print(lstat_x.shape)
y = y.reshape(-1, 1)
print(y.shape)
# V-Function Example
#=========================================================================
# Create some fake data
numpy.random.seed(0)
m = 1000
n = 10
X = 80 * numpy.random.uniform(size=(m, n)) - 40
y = numpy.abs(X[:, 6] - 4.0) + 1 * numpy.random.normal(size=m)

# Fit an Earth model
model = Earth()
model.fit(X, y)

# Print the model
print(model.trace())
print(model.summary())

# Plot the model
y_hat = model.predict(X)
pyplot.figure()
pyplot.plot(X[:, 6], y, 'r.')
pyplot.plot(X[:, 6], y_hat, 'b.')
pyplot.xlabel('x_6')
pyplot.ylabel('y')
pyplot.title('Simple Earth Example')
pyplot.savefig('simple_earth_example.png')

#=========================================================================
# Hinge plot
#=========================================================================
from xkcdify import XKCDify
        'n_estimators': 30,
        'subsample': 0.7,
        'colsample_bytree': 0.7,
        'objective': 'multi:softprob',
        'random_state': SEED}
    }

train_model(LogisticRegression, x_train, y_train, x_val, y_val, x_test, y_test, model_param = estimators_params['LogisticRegression'])
train_model(DecisionTreeClassifier, x_train, y_train, x_val, y_val, x_test, y_test, grid_param = estimators_params_grid['DecisionTreeClassifier'])
train_model(RandomForestClassifier, x_train, y_train, x_val, y_val, x_test, y_test, grid_param = estimators_params_grid['RandomForestClassifier'])
train_model(XGBClassifier, x_train, y_train, x_val, y_val, x_test, y_test, grid_param = estimators_params_grid['XGBClassifier'])

from pyearth import Earth
model = Earth(max_degree=2, )
model.fit(x_train,y_train)
model.summary()

#
# 'Earth Model\n------------------------------------------------------------------------------\n
# Basis Function                                           Pruned  Coefficient  \n
# ------------------------------------------------------------------------------\n
# (Intercept)                                              No      -370.848     \n
# affiliate_provider_email                                 No      1.44863      \n
# affiliate_channel_other                                  No      0.959827     \n
# date_first_active_year                                   No      0.187482     \n
# first_os_Android*affiliate_provider_email                No      -4.49792     \n
# first_browser_-unknown-*date_first_active_year           No      0.000224682  \n
# date_account_created_month_9*affiliate_provider_email    No      -2.88116     \n
# date_first_active_dayofyear                              No      -0.00206883  \n
# language_de                                              No      -1.68519     \n
# age_bkt_25-29*date_first_active_year                     No      0.000122373  \n
Example #24
0
def csc(df, hamming_string_dict, outdir, filename):
    """CRISPR Specificity Correction

    :param df: pandas dataframe with first column as gRNA and second column as logFC/metric
    :param hamming_string_dict: CSC onboard dictionary object with key as gRNA and value as Hamming metrics
    :param outdir: absolute filepath to output directory
    :param filename: name of input file to be used as part of output filename
    :return: CSC adjustment

    """
    # MARS compatible file
    df_mars_lst = []
    df_v = np.asarray(df)
    for i in range(len(df_v)):
        row_lst = []
        grna, metric = df_v[i][0], df_v[i][1]
        try:
            metric = float(metric)
        except ValueError:
            sys.stdout.write(
                'WARNING: encountered %s which is not float compatible, skipping\n'
                % metric)
            continue
        row_lst.append(grna)
        try:
            for jj in hamming_string_dict[grna]:
                row_lst.append(jj)
            row_lst.append(metric)
            df_mars_lst.append(row_lst)
        except KeyError:
            sys.stdout.write('\n%s not found in selected library: passing\n' %
                             grna)
            continue

    df = pd.DataFrame(df_mars_lst,
                      columns=[
                          'gRNA', 'specificity', 'h0', 'h1', 'h2', 'h3',
                          'original_value'
                      ])

    # exclude infinte specificity non-target gRNAs
    df = df[df['h0'] != 0]

    # isolate pertinent confounder variables
    df_confounders = df[['specificity', 'h0', 'h1', 'h2', 'h3']]

    # knots
    knots = df['original_value'].quantile([0.25, 0.5, 0.75, 1])

    # training and testing data
    train_x, test_x, train_y, test_y = train_test_split(df_confounders,
                                                        df['original_value'],
                                                        test_size=0.10,
                                                        random_state=1)

    # Fit an Earth model
    model = Earth(feature_importance_type='gcv')
    try:
        model.fit(train_x, train_y)
    except ValueError:
        sys.stdout.write(
            '\nValue Error encountered. Model unable to be trained. Exiting CSC Novo\n'
        )
        model_processed = 'F'
        sys.stdout.write(
            'training input x data\n %s\ntraining input y data\n %s\n' %
            (train_x, train_y))
        return model_processed

    # Print the model
    print(model.trace())
    print(model.summary())
    print(model.summary_feature_importances())

    # Plot the model
    y_hat = model.predict(test_x)

    # calculating RMSE values
    rms1 = sqrt(mean_squared_error(test_y, y_hat))
    print('\n\nRMSE on Predictions\n\n')
    print(rms1)

    # calculating R^2 for training
    print('\n\nR^2 on Training Data\n\n')
    print(model.score(train_x, train_y))

    # calculating R^2 for testing
    print('\n\nR^2 on Testing Data\n\n')
    print(model.score(test_x, test_y))

    # write out model metrics
    with open('%s/csc_model_metrics_%s.txt' % (outdir, filename),
              'w') as outfile:
        outfile.write('%s\n%s\n%s\nRMSE on Predictions\n%s' %
                      (model.trace(), model.summary(),
                       model.summary_feature_importances(), rms1))

    if rms1 <= 1.0:

        #model processed
        model_processed = 'T'

        # full data prediction
        df['earth_adjustment'] = model.predict(df_confounders)

        # CSC correction
        df['earth_corrected'] = df['original_value'] - df['earth_adjustment']

        # main write out
        df.to_csv('%s/csc_output_%s_earth_patched.csv' % (outdir, filename))

        # pickle write out
        model_file = open(
            '%s/csc_output_%s_earth_model.pl' % (outdir, filename), 'wb')
        pl.dump(model, model_file)
        model_file.close()

        sys.stdout.write('\nCSC adjustment complete\n')
        sys.stdout.write('\nCSC output files written to %s\n' % outdir)
        return model_processed

    else:
        sys.stdout.write(
            '\nCSC adjustment not computed as model residual mean squared error exceeds 1.0\n'
        )
        model_processed = 'F'
        return model_processed
Example #25
0
import numpy
from pyearth import Earth
from matplotlib import pyplot

# Create some fake data
numpy.random.seed(2)
m = 1000
n = 10
X = 80 * numpy.random.uniform(size=(m, n)) - 40
y = numpy.abs(X[:, 6] - 4.0) + 1 * numpy.random.normal(size=m)

# Fit an Earth model
model = Earth(max_degree=1)
model.fit(X, y)

# Print the model
print model.trace()
print model.summary()

# Plot the model
y_hat = model.predict(X)
pyplot.figure()
pyplot.plot(X[:, 6], y, 'r.')
pyplot.plot(X[:, 6], y_hat, 'b.')
pyplot.show()
Example #26
0
del x['<HIGH>']
del x['<LOW>']
del x['<CLOSE>']
del x['<VOL>']

numeric_feats = x.dtypes[x.dtypes != "object"].index
skewed_feats = train[numeric_feats].apply(lambda g: skew(g.dropna())) 
skewed_feats = skewed_feats.index 

x[skewed_feats] = np.log1p(x[skewed_feats])

# Fit MARS
mars = Earth(allow_missing=True)
mars.fit(x,y)
print(mars.trace())
print(mars.summary())

def inverse(x):
    x = np.exp(x) - 1
    return x

def graph(x, y, y2, a, b, Title):
    fig = plt.figure()
    plt.plot(x[a:b],y[a:b],'r', label='Actual')
    plt.plot(x[a:b],y2[a:b],'b', label='Predicted')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title(Title)
    plt.legend(loc='upper left')
    plt.show()
    return fig
np.random.seed(1)
m = 1000
n = 5

X = np.random.normal(size=(m,n))

# Make X[:,1] binary
X[:,1] = np.random.binomial(1,.5,size=m)

# The response is a linear function of the inputs
y = 2 * X[:,0] + 3 * X[:,1] + np.random.normal(size=m)

# Fit the earth model
model = Earth().fit(X, y)

# Print the model summary, showing linear terms
print model.summary()

# Plot for both values of X[:,1]
y_hat = model.predict(X)
plt.figure()
plt.plot(X[:,0], y, 'k.')
plt.plot(X[X[:,1] == 0, 0], y_hat[X[:,1] == 0], 'r.', label='$x_1 = 0$')
plt.plot(X[X[:,1] == 1, 0], y_hat[X[:,1] == 1], 'b.', label='$x_1 = 1$')
plt.legend(loc='best')
plt.xlabel('$x_0$')
plt.show()


Example #28
0
class TestEarth(object):

    def __init__(self):
        numpy.random.seed(0)
        self.basis = Basis(10)
        constant = ConstantBasisFunction()
        self.basis.append(constant)
        bf1 = HingeBasisFunction(constant, 0.1, 10, 1, False, 'x1')
        bf2 = HingeBasisFunction(constant, 0.1, 10, 1, True, 'x1')
        bf3 = LinearBasisFunction(bf1, 2, 'x2')
        self.basis.append(bf1)
        self.basis.append(bf2)
        self.basis.append(bf3)
        self.X = numpy.random.normal(size=(100, 10))
        self.B = numpy.empty(shape=(100, 4), dtype=numpy.float64)
        self.basis.transform(self.X, self.B)
        self.beta = numpy.random.normal(size=4)
        self.y = numpy.empty(shape=100, dtype=numpy.float64)
        self.y[:] = numpy.dot(
            self.B, self.beta) + numpy.random.normal(size=100)
        self.earth = Earth(penalty=1)

    def test_get_params(self):
        assert_equal(
            Earth().get_params(), {'penalty': None, 'min_search_points': None,
                                   'endspan_alpha': None, 'check_every': None,
                                   'max_terms': None, 'max_degree': None,
                                   'minspan_alpha': None, 'thresh': None,
                                   'minspan': None, 'endspan': None,
                                   'allow_linear': None, 'smooth': None})
        assert_equal(
            Earth(
                max_degree=3).get_params(), {'penalty': None,
                                             'min_search_points': None,
                                             'endspan_alpha': None,
                                             'check_every': None,
                                             'max_terms': None, 'max_degree': 3,
                                             'minspan_alpha': None,
                                             'thresh': None, 'minspan': None,
                                             'endspan': None,
                                             'allow_linear': None,
                                             'smooth': None})

    @if_statsmodels
    def test_linear_fit(self):
        from statsmodels.regression.linear_model import GLS, OLS
        self.earth.fit(self.X, self.y)
        self.earth._Earth__linear_fit(self.X, self.y)
        soln = OLS(self.y, self.earth.transform(self.X)).fit().params
        assert_almost_equal(numpy.mean((self.earth.coef_ - soln) ** 2), 0.0)

        sample_weight = 1.0 / (numpy.random.normal(size=self.y.shape) ** 2)
        self.earth.fit(self.X, self.y)
        self.earth._Earth__linear_fit(self.X, self.y, sample_weight)
        soln = GLS(self.y, self.earth.transform(
            self.X), 1.0 / sample_weight).fit().params
        assert_almost_equal(numpy.mean((self.earth.coef_ - soln) ** 2), 0.0)

    def test_sample_weight(self):
        group = numpy.random.binomial(1, .5, size=1000) == 1
        sample_weight = 1 / (group * 100 + 1.0)
        x = numpy.random.uniform(-10, 10, size=1000)
        y = numpy.abs(x)
        y[group] = numpy.abs(x[group] - 5)
        y += numpy.random.normal(0, 1, size=1000)
        model = Earth().fit(x, y, sample_weight=sample_weight)

        # Check that the model fits better for the more heavily weighted group
        assert_true(model.score(x[group], y[group]) < model.score(
            x[numpy.logical_not(group)], y[numpy.logical_not(group)]))

        # Make sure that the score function gives the same answer as the trace
        pruning_trace = model.pruning_trace()
        rsq_trace = pruning_trace.rsq(model.pruning_trace().get_selected())
        assert_almost_equal(model.score(x, y, sample_weight=sample_weight),
                            rsq_trace)

        # Uncomment below to see what this test situation looks like
#        from matplotlib import pyplot
#        print model.summary()
#        print model.score(x,y,sample_weight = sample_weight)
#        pyplot.figure()
#        pyplot.plot(x,y,'b.')
#        pyplot.plot(x,model.predict(x),'r.')
#        pyplot.show()

    def test_fit(self):
        self.earth.fit(self.X, self.y)
        res = str(self.earth.trace()) + '\n' + self.earth.summary()
#            fl.write(res)
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_regress.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)

    def test_smooth(self):
        model = Earth(penalty=1, smooth=True)
        model.fit(self.X, self.y)
        res = str(model.trace()) + '\n' + model.summary()
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_regress_smooth.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)

    def test_linvars(self):
        self.earth.fit(self.X, self.y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        res = str(self.earth.trace()) + '\n' + self.earth.summary()
        filename = os.path.join(os.path.dirname(__file__),
                                'earth_linvars_regress.txt')
        with open(filename, 'r') as fl:
            prev = fl.read()
        assert_equal(res, prev)

    def test_score(self):
        model = self.earth.fit(self.X, self.y)
        record = model.pruning_trace()
        rsq = record.rsq(record.get_selected())
        assert_almost_equal(rsq, model.score(self.X, self.y))

    @if_pandas
    @if_environ_has('test_pathological_cases')
    def test_pathological_cases(self):
        import pandas
        directory = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), 'pathological_data')
        cases = {'issue_44': {},
                 'issue_50': {'penalty': 0.5,
                              'minspan': 1,
                              'allow_linear': False,
                              'endspan': 1,
                              'check_every': 1,
                              'sample_weight': 'issue_50_weight.csv'}}
        for case, settings in cases.iteritems():
            data = pandas.read_csv(os.path.join(directory, case + '.csv'))
            y = data['y']
            del data['y']
            X = data
            if 'sample_weight' in settings:
                filename = os.path.join(directory, settings['sample_weight'])
                sample_weight = pandas.read_csv(filename)['sample_weight']
                del settings['sample_weight']
            else:
                sample_weight = None
            model = Earth(**settings)
            model.fit(X, y, sample_weight=sample_weight)
            with open(os.path.join(directory, case + '.txt'), 'r') as infile:
                correct = infile.read()
            assert_equal(model.summary(), correct)

    @if_pandas
    def test_pandas_compatibility(self):
        import pandas
        X = pandas.DataFrame(self.X)
        y = pandas.DataFrame(self.y)
        colnames = ['xx' + str(i) for i in range(X.shape[1])]
        X.columns = colnames
        model = self.earth.fit(X, y)
        assert_list_equal(
            colnames, model.forward_trace()._getstate()['xlabels'])

    @if_patsy
    @if_pandas
    def test_patsy_compatibility(self):
        import pandas
        import patsy
        X = pandas.DataFrame(self.X)
        y = pandas.DataFrame(self.y)
        colnames = ['xx' + str(i) for i in range(X.shape[1])]
        X.columns = colnames
        X['y'] = y
        y, X = patsy.dmatrices(
            'y ~ xx0 + xx1 + xx2 + xx3 + xx4 + xx5 + xx6 + xx7 + xx8 + xx9 - 1',
            data=X)
        model = self.earth.fit(X, y)
        assert_list_equal(
            colnames, model.forward_trace()._getstate()['xlabels'])

    def test_pickle_compatibility(self):
        model = self.earth.fit(self.X, self.y)
        model_copy = pickle.loads(pickle.dumps(model))
        assert_true(model_copy == model)
        assert_true(
            numpy.all(model.predict(self.X) == model_copy.predict(self.X)))
        assert_true(model.basis_[0] is model.basis_[1]._get_root())
        assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())

    def test_copy_compatibility(self):
        model = self.earth.fit(self.X, self.y)
        model_copy = copy.copy(model)
        assert_true(model_copy == model)
        assert_true(
            numpy.all(model.predict(self.X) == model_copy.predict(self.X)))
        assert_true(model.basis_[0] is model.basis_[1]._get_root())
        assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())