Example #1
0
 def marsmodelorr(self, use_smY=True, slope_trunc=0.00001, savgol_window=151, savgol_order=3, ex_order=51):
     Xf, Yf = self.Xf_, self.Yf_
     X, Y = self.X_, self.Y_
     fom = {}
     # smooth the data
     smY = savgol(Y, savgol_window, savgol_order)
     # perform mars
     model = MARS()
     if use_smY:
         model.fit(X, smY)
     else:
         model.fit(X, Y)
     Y_h = model.predict(X)
     '''
     calculate dydx based on mars model to get knots and intercepts as this is 
     complicated to extract from hinge functions
     '''
     diff1 = np.diff(Y_h) / np.diff(X)
     tdiff1 = diff1 - np.nanmin(diff1)
     tdiff1 = tdiff1 / np.nanmax(tdiff1)
     #calculate slopes of linear segments
     ID = [i for i in range(1, len(tdiff1)) if np.abs(tdiff1[i] - tdiff1[i - 1]) > slope_trunc]
     ID.insert(0, 0)
     ID.append(np.argmax(X))  # this might cause an error
     slopes = [np.nanmean(diff1[ID[i - 1]:ID[i]]) for i in range(1, len(ID) - 1)]
     a = [Y_h[ID[i]] - slopes[i] * X[ID[i]] for i in range(len(ID) - 2)]
     IDM, IDm = np.argmax(slopes), np.argmin(np.abs(slopes))
     # intercept of highest slope and zero as well as highest slope and lowest slope
     fom['zinter'] = -a[IDM] / slopes[IDM]
     fom['lminter'] = (a[IDM] - a[IDm]) / (slopes[IDm] - slopes[IDM])
     fom['max_slope'] = slopes[IDM]
     fom['curr_lminter_model'] = fom['lminter'] * slopes[IDM] + a[IDM]
     fom['curr_lminter_data'] = np.mean(Y[np.where(np.abs(X - fom['lminter']) < 0.5)[0]])
     # calculate how the CV curves kight look like without the 'ORR part'
     srYs = smY - model.predict(X)
     srYf = savgol(Yf - model.predict(Xf), savgol_window, savgol_order)
     # calculate their derivative
     dsrYf = savgol(np.diff(srYf) / np.diff(Xf), savgol_window, savgol_order)
     # find the extrema in the derivatives for extraction of redox pots
     redID_f = argrelextrema(srYf, np.less, order=ex_order)
     oxID_f = argrelextrema(srYf, np.greater, order=ex_order)
     # calc some more foms like position of redox waves
     fom['redpot_f'], fom['redpot_f_var'] = np.nanmean(Xf[redID_f]), np.nanstd(Xf[redID_f])
     fom['oxpot_f'], fom['oxpot_f_var'] = np.nanmean(Xf[oxID_f]), np.nanstd(Xf[oxID_f])
     fom['X'], fom['Xf'] = X, Xf
     fom['srYs'], fom['srYf'], fom['smY'] = srYs, srYf, smY
     fom['Y'], fom['Yf'], fom['Y_h'] = Y, Yf, Y_h
     fom['noise_lvl'] = np.sum((Y_h - Y) ** 2, axis=0)
     self.fom = fom
Example #2
0
 def fit_mars(self, X_test):
     reg = Earth(max_terms=1000, max_degree=1, penalty=3)
     reg.fit(self.X.copy().values, self.y.copy().values.flatten())
     preds = reg.predict(X_test.copy().values)
     ids = X_test.index
     pred_df = pd.DataFrame(data=preds, index=ids, columns=['SalePrice'])
     pred_df.to_csv('results/results_mars.csv', sep=',')
def mars(x_train, x_test, y_train, y_test, timestamp):
    # set model
    model = Earth(max_degree=1, penalty=1.0, endspan=5)

    # predict
    model = model.fit(x_train, y_train)

    y_pred = model.predict(x_test)

    # score
    # score=model.score(x_test,y_test)

    correlation_matrix = np.corrcoef(y_test, y_pred)
    correlation_xy = correlation_matrix[0, 1]
    score = correlation_xy**2

    MSE, MAD, MAPE = outputReport.regression_basic_results(y_test, y_pred)
    fileName, result = outputReport.regression_extanded_results(
        timestamp, y_test, y_pred, "mars")
    try:
        model_summary = str(model.summary())
        model_summary_final = model_summary.replace("\n", "<br>")
        result += "<br>Model Parameters:<br>" + str(model.get_params(
        )) + "<br>Model Summary:<br>" + model_summary_final
    except:
        result += "<br>Model Summary is not available for MARS"
    return score, fileName, MSE, MAD, MAPE, result
Example #4
0
 def estimate_reward(self, z_train, y_train, z):
     rcond = None
     mars_model = Earth(max_degree=2)
     mars_model.fit(z_train, y_train)
     reward = mars_model.predict([z])
     # print("params: ", mars_model.coef_)
     return reward
Example #5
0
def test_export_python_string():
    for smooth in (True, False):
        model = Earth(penalty=1, smooth=smooth, max_degree=2).fit(X, y)
        export_model = export_python_string(model, 'my_test_model')
        six.exec_(export_model, globals())
        for exp_pred, model_pred in zip(model.predict(X), my_test_model(X)):
            assert_almost_equal(exp_pred, model_pred)
Example #6
0
def test_copy_compatibility():
    model = Earth(**default_params).fit(X, y)
    model_copy = copy.copy(model)
    assert_true(model_copy == model)
    assert_true(numpy.all(model.predict(X) == model_copy.predict(X)))
    assert_true(model.basis_[0] is model.basis_[1]._get_root())
    assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
Example #7
0
    def marsAccuracy(self):

        #setting index as date values
        self.df.index = self.df['Date']

        self.train = self.df[:200]
        self.valid = self.df[200:]

        #Split data:
        x_train = self.train.drop('Close', axis=1)
        y_train = self.train['Close']

        x_valid = self.valid.drop('Close', axis=1)
        y_valid = self.valid['Close']

        x_train = timeToFloat(x_train)
        x_valid = timeToFloat(x_valid)

        # define the model
        model = Earth()

        # fit the model on training dataset
        model.fit(x_train, y_train)
        self.preds = model.predict(x_valid)

        #Result
        #rmse
        rmse = np.sqrt(mean_squared_error(y_valid, self.preds))
        return rmse
Example #8
0
def mars(df_train, df_test, exogenous_features, scale_list=None, max_degree=2):
    if scale_list is None:
        scale_list = []
    if len(scale_list) > 0:
        for col in scale_list:
            df_train.loc[df_train[col] < 0, col] = 0
            df_test.loc[df_test[col] < 0, col] = 0
            df_train[col] = np.log(df_train[col] + 1)
            df_test[col] = np.log(df_test[col] + 1)

    X_train = df_train[exogenous_features]
    X_test = df_test[exogenous_features]
    y_train = df_train['y']
    model = Earth(max_degree=max_degree,
                  allow_missing=True,
                  enable_pruning=True,
                  minspan_alpha=.5,
                  thresh=.001,
                  smooth=False,
                  verbose=False)
    model = model.fit(X_train, y_train)
    # Predict
    forecast = model.predict(X_test)
    if forecast < 0:
        forecast[0] = 0

    if len(scale_list) > 0:
        forecast[0] = np.exp(forecast[0])
    return np.round(forecast.item(), 0)
def test_export_python_string():
    for smooth in (True, False):
        model = Earth(penalty=1, smooth=smooth, max_degree=2).fit(X, y)
        export_model = export_python_string(model, 'my_test_model')
        six.exec_(export_model, globals())
        for exp_pred, model_pred in zip(model.predict(X), my_test_model(X)):
            assert_almost_equal(exp_pred, model_pred)
Example #10
0
class MARS:
    def __init__(self, x_train, y_train, x_test, y_test):
        self.x_train = x_train
        self.y_train = y_train
        self.x_test = x_test
        self.y_test = y_test
        self.classifier = None

    def fit(self):
        self.classifier = Earth()
        self.classifier.fit(self.x_train, self.y_train)

    def predict(self):
        return self.classifier.predict(self.x_test)

    def dichotomize(self, predictions):
        median = np.median(predictions)
        res = np.array([1 if y >= median else -1 for y in predictions])
        return res

    def evaluate(self):
        predictions = self.dichotomize(self.predict())
        # print(predictions)
        error = 0.0
        for y, correct in zip(predictions, self.y_test):
            if y != correct:
                error += 1
        return error / len(self.y_test)
Example #11
0
def marsFit(x,y):
	model = Earth(max_degree=1)
	model.fit(x,y)

	def f(x):
		return model.predict(x)

	return model.predict(x), model, range(len(x)), f
Example #12
0
def test_copy_compatibility():
    model = Earth(**default_params).fit(X, y)
    model_copy = copy.copy(model)
    assert_true(model_copy == model)
    assert_true(
        numpy.all(model.predict(X) == model_copy.predict(X)))
    assert_true(model.basis_[0] is model.basis_[1]._get_root())
    assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
Example #13
0
def test_copy_compatibility():
    numpy.random.seed(0)
    model = Earth(**default_params).fit(X, y)
    model_copy = copy.copy(model)
    assert_true(model_copy == model)
    assert_array_almost_equal(model.predict(X), model_copy.predict(X))
    assert_true(model.basis_[0] is model.basis_[1]._get_root())
    assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
Example #14
0
def run_pyearth(X, y, **kwargs):
    '''Run with pyearth.  Return prediction value, training time, and number of forward pass iterations.'''
    model = Earth(**kwargs)
    t0 = time.time()
    model.fit(X, y)
    t1 = time.time()
    y_pred = model.predict(X)
    forward_iterations = len(model.forward_trace()) - 1
    return y_pred, t1 - t0, forward_iterations
Example #15
0
def run_pyearth(X, y, **kwargs):
    '''Run with pyearth.  Return prediction value, training time, and number of forward pass iterations.'''
    model = Earth(**kwargs)
    t0 = time.time()
    model.fit(X, y)
    t1 = time.time()
    y_pred = model.predict(X)
    forward_iterations = len(model.forward_trace()) - 1
    return y_pred, t1 - t0, forward_iterations
Example #16
0
def test_nb_terms():

    for max_terms in (1, 3, 12, 13):
        model = Earth(max_terms=max_terms)
        model.fit(X, y)
        assert_true(len(model.basis_) <= max_terms + 2)
        assert_true(len(model.coef_) <= len(model.basis_))
        assert_true(len(model.coef_) >= 1)
        if max_terms == 1:
            assert_list_almost_equal_value(model.predict(X), y.mean())
Example #17
0
def test_nb_terms():

    for max_terms in (1, 3, 12, 13):
        model = Earth(max_terms=max_terms)
        model.fit(X, y)
        assert_true(len(model.basis_) <= max_terms)
        assert_true(len(model.coef_) <= len(model.basis_))
        assert_true(len(model.coef_) >= 1)
        if max_terms == 1:
            assert_list_almost_equal_value(model.predict(X), y.mean())
Example #18
0
def test_export_sympy():
    import pandas as pd
    from sympy.utilities.lambdify import lambdify
    from sympy.printing.lambdarepr import NumPyPrinter

    class PyEarthNumpyPrinter(NumPyPrinter):
        def _print_Max(self, expr):
            return 'maximum(' + ','.join(self._print(i) for i in expr.args) + ')'

        def _print_NaNProtect(self, expr):
            return 'where(isnan(' + ','.join(self._print(a) for a in expr.args) + '), 0, ' \
                + ','.join(self._print(a) for a in expr.args) + ')'

        def _print_Missing(self, expr):
            return 'isnan(' + ','.join(self._print(a) for a in expr.args) + ').astype(float)'

    for smooth, n_cols, allow_missing in product((True, False), (1, 2), (True, False)):
        X_df = pd.DataFrame(X.copy(), columns=['x_%d' % i for i in range(X.shape[1])])
        y_df = pd.DataFrame(Y[:, :n_cols])
        if allow_missing:
            # Randomly remove some values so that the fitted model contains MissingnessBasisFunctions
            X_df['x_1'][numpy.random.binomial(n=1, p=.1, size=X_df.shape[0]).astype(bool)] = numpy.nan

        model = Earth(allow_missing=allow_missing, smooth=smooth, max_degree=2).fit(X_df, y_df)
        expressions = export_sympy(model) if n_cols > 1 else [export_sympy(model)]
        module_dict = {'select': numpy.select, 'less_equal': numpy.less_equal, 'isnan': numpy.isnan,
                       'greater_equal':numpy.greater_equal, 'logical_and': numpy.logical_and, 'less': numpy.less,
                       'logical_not':numpy.logical_not, "greater": numpy.greater, 'maximum':numpy.maximum,
                       'Missing': lambda x: numpy.isnan(x).astype(float),
                       'NaNProtect': lambda x: numpy.where(numpy.isnan(x), 0, x), 'nan': numpy.nan,
                       'float': float, 'where': numpy.where
                       }

        for i, expression in enumerate(expressions):
            # The lambdified functions for smoothed basis functions only work with modules='numpy' and
            # for regular basis functions with modules={'Max':numpy.maximum}.  This is a confusing situation
            func = lambdify(X_df.columns, expression, printer=PyEarthNumpyPrinter, modules=module_dict)
            y_pred_sympy = func(*[X_df.loc[:,var] for var in X_df.columns])

            y_pred = model.predict(X_df)[:,i] if n_cols > 1 else model.predict(X_df)
            assert_array_almost_equal(y_pred, y_pred_sympy)
Example #19
0
def runModel(i,featureCombo):
    mae = np.array([])   
    logging.warning('try alpha = %s' % i)
    for ktrain,ktest in kf:
        x = trainCleaned.iloc[ktrain,]
        y = trainCleaned.iloc[ktest,]    
        model = Earth()
        model.fit(x[featureCombo],x['Expected'])
	pred = model.predict(y[featureCombo])
        mae = np.append(mae,(getMAE(pred,y['Expected'])))
    logging.warning('average 10-fold MAE for alpha %s feature %s' % (i,featureCombo))
    logging.warning(mae.mean())
Example #20
0
def mars(p, xLabels, yLabel):
    global image_num
    criteria = ('rss', 'gcv', 'nb_subsets')
    # Randomly shuffle rows
    p = p.sample(frac=1).reset_index(drop=True)
    # Split train and test
    twentyPercent = -1 * round(p.shape[0] * 0.2)
    n = len(xLabels)
    xCol = p[xLabels].values.reshape(-1, n)
    X_train = xCol[:twentyPercent]
    X_test = xCol[twentyPercent:]
    y_train = p[yLabel][:twentyPercent].values.reshape(-1, 1)
    y_test = p[yLabel][twentyPercent:].values.reshape(-1, 1)
    # Fit MARS model
    model = Earth(feature_importance_type=criteria)
    model.fit(X_train, y_train)
    # Make predictions
    predicted = model.predict(X_test)
    r2 = r2_score(y_test, predicted)
    mse = mean_squared_error(y_test, predicted)
    predicted = predicted.reshape(-1, 1)
    # Plot residuals
    plotResiduals(y_test, predicted)
    # Print summary
    print(model.trace())
    print(model.summary())
    # Plot feature importances
    importances = model.feature_importances_
    for crit in criteria:
        x = list(range(0, len(xLabels)))
        sorted_rss = [
            list(t)
            for t in sorted(zip(importances[crit], xLabels), reverse=True)
        ]
        coeff = []
        feature = []
        for j in range(0, len(sorted_rss)):
            coeff.append(abs(sorted_rss[j][0]))
            feature.append(featureToLabel[sorted_rss[j][1]])
        plt.clf()
        plt.xticks(x, feature, rotation='vertical')
        plt.bar(x, coeff, align='center', alpha=0.5)
        plt.xlabel('Features')
        label = "Importance (" + crit + ")"
        plt.ylabel(label)
        plt.tight_layout()
        label = "mars_imp_" + crit
        plt.show()
        plt.savefig(image_path.format(image_num), bbox_inches='tight')
        image_num += 1
    return r2, mse
Example #21
0
class Diagnostics:
    def __init__(self, env, features, *args, **kwargs):
        self.env = env
        self.solution = features
        self.data = env.X.loc[:, features.astype(bool)].copy()
        self.y = self.env.y
        self.model = EarthModel(*args, **kwargs)
        self.y_pred = None
        self.error = None
        self._fit()

    def _fit(self):
        self.model.fit(self.data, self.y)
        self.y_pred = self.model.predict(self.data)
        self.error = (self.y_pred.flatten() - self.env.y.flatten())

    def summary(self):
        return model_summary(self.model,
                             self.data.columns).sort_values("feature")

    def plot_thresholds(self):
        return plot_thresholds(self.summary(), self.data)

    def plot_autocorrelations(self):
        from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
        _ = plot_pacf(self.error)
        _ = plot_acf(self.error)

    def plot_qq(self):
        fig, ax = plt.subplots()
        _, (slope, intercept, r_norm) = scipy.stats.probplot(self.error,
                                                             plot=ax,
                                                             fit=True)
        print("R squared {:.4f}".format(r_norm**2))

    def plot_pred(self):
        df = pd.DataFrame({
            "predicted": self.y_pred,
            "True value": self.y
        },
                          index=self.data.index)
        return df.hvplot().opts(
            title="Model prediction for {}".format(self.env.target))

    def score(self):
        mse, gvc, rsq, grsq = self.model.mse_, self.model.gcv_, self.model.rsq_, self.model.grsq_
        msg = "MSE: {:.4f}, GCV: {:.4f}, RSQ:{:.4f}, GRSQ: {:.4f}".format(
            mse, gvc, rsq, grsq)
        return msg
Example #22
0
def test_output_weight():
    x = numpy.random.uniform(-1, 1, size=(1000, 1))
    y = (numpy.dot(x, numpy.random.normal(0, 1, size=(1, 10)))) ** 5 + 1
    y = (y - y.mean(axis=0)) / y.std(axis=0)
    group = numpy.array([1] * 5 + [0] * 5)
    output_weight = numpy.array([1] * 5 + [2] * 5, dtype=float)
    model = Earth().fit(x, y, output_weight=output_weight)

    # Check that the model fits at least better
    # the more heavily weighted group
    mse = ((model.predict(x) - y)**2).mean(axis=0)
    group1_mean = mse[group].mean()
    group2_mean = mse[numpy.logical_not(group)].mean()
    assert_true(group1_mean > group2_mean or
                round(abs(group1_mean - group2_mean), 7) == 0)
Example #23
0
def test_output_weight():
    x = numpy.random.uniform(-1, 1, size=(1000, 1))
    y = (numpy.dot(x, numpy.random.normal(0, 1, size=(1, 10))))**5 + 1
    y = (y - y.mean(axis=0)) / y.std(axis=0)
    group = numpy.array([1] * 5 + [0] * 5)
    output_weight = numpy.array([1] * 5 + [2] * 5, dtype=float)
    model = Earth().fit(x, y, output_weight=output_weight)

    # Check that the model fits at least better
    # the more heavily weighted group
    mse = ((model.predict(x) - y)**2).mean(axis=0)
    group1_mean = mse[group].mean()
    group2_mean = mse[numpy.logical_not(group)].mean()
    assert_true(group1_mean > group2_mean
                or round(abs(group1_mean - group2_mean), 7) == 0)
Example #24
0
    def MARS(self, X=None, Y=None):
        """This function is used to imeplement Multivariate Adadptive Regression Splines
        """
        from pyearth import Earth
        rgr = Earth()
        if (X is not None and Y is not None):
            (self.sampled_X, self.sampled_Y) = (X, Y)

        # train
        rgr.fit(self.sampled_X, self.sampled_Y)
        # test
        Y_pred = rgr.predict(self.X)
        # compute metric
        m_nmse = self.metric.normalized_mean_square_error(Y_pred, self.Y)
        m_mape = self.metric.mean_absolute_percentage_error(Y_pred, self.Y)
        return (m_nmse, m_mape)
def Mars_detrend(x, y):
    model = Earth()
    model.fit(x, y)

    #    print(model.trace())
    #    print(model.summary())

    y_hat = model.predict(x)
    #    pyplot.figure()
    #    pyplot.plot(x,y,'r.')
    #    pyplot.plot(x,y_hat,'b.')
    #    pyplot.xlabel('x_6')
    #    pyplot.ylabel('y')
    #    pyplot.title('Maize yield in a grid')
    #    pyplot.show()
    return y_hat
Example #26
0
    def mars(self, max_degree=2):
        model = Earth(max_degree=max_degree,
                      allow_missing=True,
                      enable_pruning=True,
                      minspan_alpha=.5,
                      thresh=.001,
                      smooth=False,
                      verbose=False)

        model = model.fit(self.X_train, self.y_train)
        forecast = model.predict(self.X_test)
        if forecast < 0:
            forecast[0] = 0
        if len(self.scale_list) > 0:
            forecast[0] = np.exp(forecast[0])
        return np.round(forecast.item(), 0)
Example #27
0
def getTrain(trainData, testData):

    size_s = len(trainData)
    size_t = len(testData)
    lenY = len(testData[0])



    X = numpy.zeros((size_s,lenY-1))
    Y = numpy.zeros((size_s,1))

    z = 0

    for d in trainData:
        for j in range(lenY-1):
            X[z][j] = d[j]
        Y[z][0] = float(d[lenY-1])
        z += 1

    z = 0
    dX = numpy.zeros((size_t,lenY-1))

    for d in testData:
        for j in range(lenY-1):
            dX[z][j] = d[j]
        z += 1

    model = Earth()
    model.fit(X,Y)


    y_hat = model.predict(dX)

    corrent = 0

    for i in range(size_t):
        x1 = testData[i][lenY-1]
        x2 = y_hat[i]

        if x1 * x2 >= 0:
            corrent += 1
    return corrent
Example #28
0
def mars(df_train, df_test, exogenous_features, max_degree=2):
    if (df_test['IP'].values == 0) & (df_test['CON'].values == 0):
        forecast = 0
        return forecast
    X_train = df_train[exogenous_features]
    X_test = df_test[exogenous_features]
    y_train = df_train['y']
    model = Earth(max_degree=max_degree,
                  allow_missing=True,
                  enable_pruning=True,
                  minspan_alpha=.5,
                  thresh=.001,
                  smooth=False,
                  verbose=False)
    model = model.fit(X_train, y_train)
    # Predict
    forecast = model.predict(X_test)
    if forecast < 0:
        forecast[0] = 0
    return np.round(forecast.item(), 0)
def mars_forecast(x_train, x_test, y_train, timestamp):
    # set model
    model = Earth(max_degree=1, penalty=1.0, endspan=5)

    # predict
    model = model.fit(x_train, y_train)

    y_pred = pd.DataFrame(model.predict(x_test), columns=["Forecasted Values"])

    filename = outputReport.regression_extanded_results_forecast(
        timestamp, y_pred, "mars forecast")

    try:
        model_summary = str(model.summary())
        model_summary_final = model_summary.replace("\n", "<br>")
        result = "<br>Model Parameters:<br>" + str(model.get_params(
        )) + "<br>Model Summary:<br>" + model_summary_final
    except:
        result = "Model Summary is not available for MARS"

    result += str(
        y_pred.to_html(formatters={'Name': lambda x: '<b>' + x + '</b>'}))

    return filename, result
Example #30
0
class MARSInterpolant(Earth):
    """Compute and evaluate a MARS interpolant

    :ivar nump: Current number of points
    :ivar maxp: Initial maximum number of points (can grow)
    :ivar x: Interpolation points
    :ivar fx: Function evaluations of interpolation points
    :ivar dim: Number of dimensions
    :ivar model: MARS interpolaion model
    """

    def __init__(self, maxp=100):
        self.nump = 0
        self.maxp = maxp
        self.x = None     # pylint: disable=invalid-name
        self.fx = None
        self.dim = None
        self.model = Earth()
        self.updated = False

    def reset(self):
        """Reset the interpolation."""
        self.nump = 0
        self.x = None
        self.fx = None
        self.updated = False

    def _alloc(self, dim):
        """Allocate storage for x, fx, rhs, and A.

        :param dim: Number of dimensions
        """
        maxp = self.maxp
        self.dim = dim
        self.x = np.zeros((maxp, dim))
        self.fx = np.zeros((maxp, 1))

    def _realloc(self, dim, extra=1):
        """Expand allocation to accommodate more points (if needed)

        :param dim: Number of dimensions
        :param extra: Number of additional points to accommodate
        """
        if self.nump == 0:
            self._alloc(dim)
        elif self.nump+extra > self.maxp:
            self.maxp = max(self.maxp*2, self.maxp+extra)
            self.x.resize((self.maxp, dim))
            self.fx.resize((self.maxp, 1))

    def get_x(self):
        """Get the list of data points

        :return: List of data points
        """
        return self.x[:self.nump, :]

    def get_fx(self):
        """Get the list of function values for the data points.

        :return: List of function values
        """
        return self.fx[:self.nump, :]

    def add_point(self, xx, fx):
        """Add a new function evaluation

        :param xx: Point to add
        :param fx: The function value of the point to add
        """
        dim = len(xx)
        self._realloc(dim)
        self.x[self.nump, :] = xx
        self.fx[self.nump, :] = fx
        self.nump += 1
        self.updated = False

    def eval(self, xx, d=None):
        """Evaluate the MARS interpolant at the point xx

        :param xx: Point where to evaluate
        :return: Value of the MARS interpolant at x
        """
        if self.updated is False:
            self.model.fit(self.x, self.fx)
        self.updated = True

        xx = np.expand_dims(xx, axis=0)
        fx = self.model.predict(xx)
        return fx[0]

    def evals(self, xx, d=None):
        """Evaluate the MARS interpolant at the points xx

        :param xx: Points where to evaluate
        :return: Values of the MARS interpolant at x
        """
        if self.updated is False:
            self.model.fit(self.x, self.fx)
        self.updated = True

        fx = np.zeros(shape=(xx.shape[0], 1))
        fx[:, 0] = self.model.predict(xx)
        return fx

    def deriv(self, x, d=None):
        """Evaluate the derivative of the MARS interpolant at x

        :param x: Data point
        :return: Derivative of the MARS interpolant at x
        """

        if self.updated is False:
            self.model.fit(self.x, self.fx)
        self.updated = True

        x = np.expand_dims(x, axis=0)
        dfx = self.model.predict_deriv(x, variables=None)
        return dfx[0]
Example #31
0
def earth(x, y):
    model = Earth(max_terms=30, endspan=2, thresh=0.00001)
    model.fit(np.array(x), np.array(y))
    return model.predict(x)
Example #32
0
k = 1
fig = plt.figure()
for i, alpha in enumerate(alphas):
    # Fit an Earth model
    model = Earth(max_degree=5,
                  minspan_alpha=.05,
                  endspan_alpha=.05,
                  max_terms=10,
                  check_every=1,
                  thresh=0.)
    output_weight = np.array([alpha, 1 - alpha])
    model.fit(X, y_mix, output_weight=output_weight)
    print(model.summary())

    # Plot the model
    y_hat = model.predict(X)

    mse = ((y_hat - y_mix) ** 2).mean(axis=0)
    ax = plt.subplot(n_plots, 2, k)
    ax.set_ylabel("Run {0}".format(i + 1), rotation=0, labelpad=20)
    plt.plot(X[:, 6], y_mix[:, 0], 'r.')
    plt.plot(X[:, 6], model.predict(X)[:, 0], 'b.')
    plt.title("MSE: {0:.3f}, Weight : {1:.1f}".format(mse[0], alpha))
    plt.subplot(n_plots, 2, k + 1)
    plt.plot(X[:, 5], y_mix[:, 1], 'r.')
    plt.plot(X[:, 5], model.predict(X)[:, 1], 'b.')
    plt.title("MSE: {0:.3f}, Weight : {1:.1f}".format(mse[1], 1 - alpha))
    k += 2
plt.tight_layout()
plt.show()
Example #33
0
def mars_method(energy,
                absorption_coefficient,
                bg_type='direct',
                show_graph=True):
    direct_abs_corrected = absorption_coefficient
    t = []
    for k in direct_abs_corrected:
        t.append(k / (max(direct_abs_corrected)))
    direct_abs_corrected = t

    model = Earth()
    try:
        model.fit(np.array(energy), np.array(direct_abs_corrected))
    except ValueError:
        return 'Problem in MARS fitting parameters!'

    energy_elbows = []
    energy_elbows.append(min(energy))
    energy_elbows.append(max(energy))
    for coeff in list(model.basis_)[1:]:
        try:
            if float(re.findall("\d+\.\d+",
                                str(coeff))[0]) not in energy_elbows:
                energy_elbows.append(
                    float(re.findall("\d+\.\d+", str(coeff))[0]))
        except IndexError:
            print(coeff)
            pass

    y_hat = model.predict(energy)
    if show_graph == True:
        plt.figure()
        plt.scatter(energy, direct_abs_corrected, color='k')
        plt.plot(energy, y_hat, 'b.')
        plt.xlabel('Energy (eV)', fontsize=14)
        plt.ylabel('(E' + u"\u03B1" + ')' + u"\u00B2", fontsize=14)
        #                    plt.ylabel('sqrt(E'+u"\u03B1"+')',fontsize=14)
        #                    plt.ylabel('Normalized Direct Absorbance',fontsize=14)
        plt.title('Tauc Plot for Direct Transitions', fontsize=20)
        plt.xticks(fontsize=14)
        plt.yticks(fontsize=14)
    function = export.export_sympy(model)

    direct_abs_elbows = []
    for coeff in energy_elbows:
        direct_abs_elbows.append(function.evalf(subs={'x0': coeff}))
    elbows_list = []
    for elbow_num in range(0, len(energy_elbows)):
        elbows_list.append(
            tuple([energy_elbows[elbow_num], direct_abs_elbows[elbow_num]]))
    elbows_list = sorted(elbows_list)

    line_segs = []
    for point in range(0, len(elbows_list) - 1):
        que = []
        for w in energy:
            if w > elbows_list[point][0] and w < elbows_list[point + 1][0]:
                que.append(w)
        num_pts = len(que)
        x_length = elbows_list[point + 1][0] - elbows_list[point][0]
        length = ((elbows_list[point+1][0]-elbows_list[point][0])**2\
        +(elbows_list[point+1][1]-elbows_list[point][1])**2)**.5
        slope = (elbows_list[point + 1][1] - elbows_list[point][1]) / (
            elbows_list[point + 1][0] - elbows_list[point][0])
        y_intercept = elbows_list[point +
                                  1][1] - slope * elbows_list[point + 1][0]
        x_intercept = (-1 * y_intercept) / slope
        weighting_factor = slope**2 * x_length * 2 * abs(length)**.5 * num_pts
        try:
            if x_intercept > 0 and slope > 0 and num_pts > 10:
                line_segs.append(
                    tuple([
                        x_length, length, slope, y_intercept, x_intercept,
                        weighting_factor
                    ]))
        except TypeError:
            print('Weird complex zoo error..')
            pass

    line_segs = sorted(line_segs, key=lambda item: item[5])
    #            print(line_segs)
    winner = max(line_segs, key=lambda item: item[5])

    adj_energy = np.linspace(min(energy), max(energy), num=1000)
    adj_winner = []
    for t in adj_energy:
        adj_winner.append(t * float(winner[2]) + float(winner[3]))
    if show_graph == True:
        plt.scatter(adj_energy, adj_winner, color='r')
        plt.axis([min(energy), max(energy), 0, 1])
        plt.show()
    return winner[4]
Example #34
0
y1 = 100 * \
    numpy.abs(numpy.sin((X[:, 6]) / 10) - 4.0) + \
    10 * numpy.random.normal(size=m)

y2 = 100 * \
    numpy.abs(numpy.sin((X[:, 6]) / 2) - 8.0) + \
    5 * numpy.random.normal(size=m)

# Fit an Earth model
model = Earth(max_degree=3, minspan_alpha=.5)
y_mix = numpy.concatenate((y1[:, numpy.newaxis], y2[:, numpy.newaxis]), axis=1)
model.fit(X, y_mix)

# Print the model
print(model.trace())
print(model.summary())

# Plot the model
y_hat = model.predict(X)

fig = plt.figure()

ax = fig.add_subplot(1, 2, 1)
ax.plot(X[:, 6], y_mix[:, 0], 'r.')
ax.plot(X[:, 6], model.predict(X)[:, 0], 'b.')

ax = fig.add_subplot(1, 2, 2)
ax.plot(X[:, 6], y_mix[:, 1], 'r.')
ax.plot(X[:, 6], model.predict(X)[:, 1], 'b.')
plt.show()
def test_export_python_function():
    for smooth in (True, False):
        model = Earth(penalty=1, smooth=smooth, max_degree=2).fit(X, y)
        export_model = export_python_function(model)
        for exp_pred, model_pred in zip(model.predict(X), export_model(X)):
            assert_almost_equal(exp_pred, model_pred)
# array([3.21838587, 3.16720653, 3.25737585, 3.2542665 , 3.24746355])

# 3. Lasso Regression
lasso = Lasso(alpha=0.01)
lassoMSE = kFoldValidation(5, lasso, array_train, array_y)
lassoMSE
# array([3.23388954, 3.18301436, 3.27518402, 3.27289743, 3.26569614])

# 4. Spline
# Since it is too slow to do the k cross validation for spline,
# just use validation set to test the performance.
spline = Earth()
spline.fit(array_train, array_y)
array_val = np.array(x_val)
array_y_val = np.array(np.log1p(y_val.iloc[:, 0]))
preds_val = spline.predict(array_val)
splineMSE = np.mean((preds_val - array_y_val)**2)
splineMSE
# 3.5848521191901126

# 5. Random Forest
rf = RandomForestRegressor(max_depth=20, random_state=42, n_estimators=100)
rf.fit(array_train, array_y)

# Feature importance
# Very interesting. The top 14 important features are not consistent
# with the top 14 correlated features.

dic = {}
for feature, importance in zip(x_train.columns, rf.feature_importances_):
    dic[feature] = importance
Example #37
0
    x = np.exp(x) - 1
    return x

def graph(x, y, y2, a, b, Title):
    fig = plt.figure()
    plt.plot(x[a:b],y[a:b],'r', label='Actual')
    plt.plot(x[a:b],y2[a:b],'b', label='Predicted')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title(Title)
    plt.legend(loc='upper left')
    plt.show()
    return fig

# Predict training series
y_hat = mars.predict(x)
x_train = list(range(0,len(y)))

# Process test data
test = test[cols].astype(str)
for i in cols:
    for j in range(0,len(test)):
        test[i][j] = test[i][j].replace(",","")
 
test = test.astype(float)

HT = talib.HT_DCPERIOD(test['<OPEN>'])
std = talib.STDDEV(test['<OPEN>'], timeperiod=7, nbdev=1)

HT = pd.DataFrame(data={'HT_DCPERIOD':HT})
std = pd.DataFrame(data={'STDDEV':std})
Example #38
0
def generate_MARS(
    training_data,
    modelname="4.01-MARS",
    responseColumn="log(q30)",
    predictorColumns="default",  #default => all non-response columns in training data
    max_degree=2,
    minspan_alpha=0.5,
    smooth=False,
    trainingSplitRatio=0.8,
    trainingSplitRandom=random.RandomState(),
    persist=True,
    returnTestSetResults=False,  #True ==> will return predictions, the actual, and the predictors
    verbose=True  #setting this to True will still save a model, but not return any images/diagnostics
):

    from pyearth import Earth

    model = Earth(max_degree=max_degree,
                  minspan_alpha=minspan_alpha,
                  smooth=smooth)

    replace_nans_infs(training_data)
    X, y = splitXy(training_data, responseColumn, predictorColumns)
    Xtrain, Xtest, ytrain, ytest = train_test_split(
        X, y, train_size=trainingSplitRatio, random_state=trainingSplitRandom)
    model.fit(Xtrain, ytrain)

    ##Model evaluation:

    yhat = model.predict(Xtest)
    R2 = r2_score(yhat, ytest)  #imported above via sklearn.metrics

    ##If model is successfully generated, output results##

    modelDir = "models/%s/%s/" % (modelname, today_string)
    imageDir = modelDir + "0-images/"

    if persist == True:

        try:
            os.listdir(modelDir)
        except:
            os.makedirs(modelDir)
            os.mkdir(imageDir)

        joblib.dump(model, modelDir + "model.pkl")

        plt.rcParams['figure.figsize'] = [9, 4]
        plt.subplot(121)
        plt.scatter(ytest, yhat, alpha=.1)
        plt.plot([0, 10], [0, 10])
        #plt.xlim(0,10); plt.ylim(0,10)
        plt.xlabel("actual")
        plt.ylabel("predicted")
        plt.title("Model = MARS(%i)\t\t $R^2$=%.2f" % (max_degree, R2))
        plt.subplot(122)
        sns.set(style="whitegrid")
        sns.residplot(ytest, yhat)  #, lowess=True)

        plt.savefig(imageDir + "diagnostics.png")
        plt.close()

    if returnTestSetResults == True:
        return yhat, ytest, Xtest

    if verbose == True:
        print "MARS(%i) model successfully generated! \t\t\t\t\t(Train: %i, Test: %i)\n\tModel file saved in:\t\t%s\n\tDiagnostics plots saved in:\t%s\n" % (
            max_degree, len(ytrain), len(ytest), modelDir, imageDir)
Example #39
0
def test_export_python_function():
    for smooth in (True, False):
        model = Earth(penalty=1, smooth=smooth, max_degree=2).fit(X, y)
        export_model = export_python_function(model)
        for exp_pred, model_pred in zip(model.predict(X), export_model(X)):
            assert_almost_equal(exp_pred, model_pred)
numpy.random.seed(0)
m = 1000
n = 10
X = 80 * numpy.random.uniform(size=(m, n)) - 40
y = numpy.abs(X[:, 6] - 4.0) + 1 * numpy.random.normal(size=m)

# Fit an Earth model
model = Earth()
model.fit(X, y)

# Print the model
print(model.trace())
print(model.summary())

# Plot the model
y_hat = model.predict(X)
pyplot.figure()
pyplot.plot(X[:, 6], y, 'r.')
pyplot.plot(X[:, 6], y_hat, 'b.')
pyplot.xlabel('x_6')
pyplot.ylabel('y')
pyplot.title('Simple Earth Example')
pyplot.savefig('simple_earth_example.png')

#=========================================================================
# Hinge plot
#=========================================================================
from xkcdify import XKCDify
x = numpy.arange(-10, 10, .1)
y = x * (x > 0)
Example #41
0
folder_path = join(DATA_DIR, 'models', folder_name)
os.makedirs(folder_path)
# Dump the hyperparameter dictionary
with open(join(folder_path, 'hyperparameters.pkl'), 'wb') as f:
    pickle.dump(hp, f, -1)

training_times = []
for a in xrange(0, y.shape[1]):
    start = time.time()
    y_train = y_train_mat[:, a:(a + 1)].ravel()
    model = Earth(**hp)
    model.fit(X_train_scaled, y_train)
    end = time.time()
    print 'Fast MARS t-7, a{0} took {1} to train'.format(a, end - start)
    training_times.append(end - start)

    with open(join(DATA_DIR,
                   'models/{0}/MARS_a{1}.pkl'.format(folder_name, a)),
              'wb') as f:
        pickle.dump(model, f, -1)

    start = time.time()
    y_pred_mat[:, a] = model.predict(X_test)
    end = time.time()
    print 'Fast MARS t-7, a{0} took {1} to predict'.format(a, end - start)
    sys.stdout.flush()
RMSE = mean_squared_error(y_test_mat, y_pred_mat) ** 0.5
print RMSE
with open(join(folder_path, 'stats.txt'), 'wb') as f:
    f.write('{0}\n{1}\n'.format(str(RMSE), sum(training_times)))
def test_export_sympy():
    import pandas as pd
    from sympy.utilities.lambdify import lambdify
    from sympy.printing.lambdarepr import NumPyPrinter

    class PyEarthNumpyPrinter(NumPyPrinter):
        def _print_Max(self, expr):
            return 'maximum(' + ','.join(self._print(i)
                                         for i in expr.args) + ')'

        def _print_NaNProtect(self, expr):
            return 'where(isnan(' + ','.join(self._print(a) for a in expr.args) + '), 0, ' \
                + ','.join(self._print(a) for a in expr.args) + ')'

        def _print_Missing(self, expr):
            return 'isnan(' + ','.join(self._print(a)
                                       for a in expr.args) + ').astype(float)'

    for smooth, n_cols, allow_missing in product((True, False), (1, 2),
                                                 (True, False)):
        X_df = pd.DataFrame(X.copy(),
                            columns=['x_%d' % i for i in range(X.shape[1])])
        y_df = pd.DataFrame(Y[:, :n_cols])
        if allow_missing:
            # Randomly remove some values so that the fitted model contains MissingnessBasisFunctions
            X_df['x_1'][numpy.random.binomial(
                n=1, p=.1, size=X_df.shape[0]).astype(bool)] = numpy.nan

        model = Earth(allow_missing=allow_missing, smooth=smooth,
                      max_degree=2).fit(X_df, y_df)
        expressions = export_sympy(model) if n_cols > 1 else [
            export_sympy(model)
        ]
        module_dict = {
            'select': numpy.select,
            'less_equal': numpy.less_equal,
            'isnan': numpy.isnan,
            'greater_equal': numpy.greater_equal,
            'logical_and': numpy.logical_and,
            'less': numpy.less,
            'logical_not': numpy.logical_not,
            "greater": numpy.greater,
            'maximum': numpy.maximum,
            'Missing': lambda x: numpy.isnan(x).astype(float),
            'NaNProtect': lambda x: numpy.where(numpy.isnan(x), 0, x),
            'nan': numpy.nan,
            'float': float,
            'where': numpy.where
        }

        for i, expression in enumerate(expressions):
            # The lambdified functions for smoothed basis functions only work with modules='numpy' and
            # for regular basis functions with modules={'Max':numpy.maximum}.  This is a confusing situation
            func = lambdify(X_df.columns,
                            expression,
                            printer=PyEarthNumpyPrinter,
                            modules=module_dict)
            y_pred_sympy = func(*[X_df.loc[:, var] for var in X_df.columns])

            y_pred = model.predict(
                X_df)[:, i] if n_cols > 1 else model.predict(X_df)
            assert_array_almost_equal(y_pred, y_pred_sympy)
Example #43
0
class MARSInterpolant(Surrogate):
    """Compute and evaluate a MARS interpolant

    MARS builds a model of the form

    .. math::

        \\hat{f}(x) = \\sum_{i=1}^{k} c_i B_i(x).

    The model is a weighted sum of basis functions :math:`B_i(x)`. Each basis
    function :math:`B_i(x)` takes one of the following three forms:

    1. a constant 1.
    2. a hinge function of the form :math:`\\max(0, x - const)` or \
       :math:`\\max(0, const - x)`. MARS automatically selects variables \
       and values of those variables for knots of the hinge functions.
    3. a product of two or more hinge functions. These basis functions c \
       an model interaction between two or more variables.

    :param dim: Number of dimensions
    :type dim: int

    :ivar dim: Number of dimensions
    :ivar num_pts: Number of points in surrogate model
    :ivar X: Point incorporated in surrogate model (num_pts x dim)
    :ivar fX: Function values in surrogate model (num_pts x 1)
    :ivar updated: True if model is up-to-date (no refit needed)
    :ivar model: Earth object
    """
    def __init__(self, dim):
        self.num_pts = 0
        self.X = np.empty([0, dim])
        self.fX = np.empty([0, 1])
        self.dim = dim
        self.updated = False

        try:
            from pyearth import Earth
            self.model = Earth()
        except ImportError as err:
            print("Failed to import pyearth")
            raise err

    def _fit(self):
        """Compute new coefficients if the MARS interpolant is not updated."""
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")  # Surpress deprecation warnings
            if self.updated is False:
                self.model.fit(self.X, self.fX)
                self.updated = True

    def predict(self, xx):
        """Evaluate the MARS interpolant at the points xx

        :param xx: Prediction points, must be of size num_pts x dim or (dim, )
        :type xx: numpy.ndarray

        :return: Prediction of size num_pts x 1
        :rtype: numpy.ndarray
        """
        self._fit()
        xx = np.atleast_2d(xx)
        return np.expand_dims(self.model.predict(xx), axis=1)

    def predict_deriv(self, xx):
        """Evaluate the derivative of the MARS interpolant at points xx

        :param xx: Prediction points, must be of size num_pts x dim or (dim, )
        :type xx: numpy.array

        :return: Derivative of the RBF interpolant at xx
        :rtype: numpy.array
        """
        self._fit()
        xx = np.expand_dims(xx, axis=0)
        dfx = self.model.predict_deriv(xx, variables=None)
        return dfx[0]
Example #44
0
class MARSInterpolant(Surrogate):
    """Compute and evaluate a MARS interpolant

    MARS builds a model of the form

    .. math::

        \\hat{f}(x) = \\sum_{i=1}^{k} c_i B_i(x).

    The model is a weighted sum of basis functions :math:`B_i(x)`. Each basis
    function :math:`B_i(x)` takes one of the following three forms:

    1. a constant 1.
    2. a hinge function of the form :math:`\\max(0, x - const)` or \
       :math:`\\max(0, const - x)`. MARS automatically selects variables \
       and values of those variables for knots of the hinge functions.
    3. a product of two or more hinge functions. These basis functions c \
       an model interaction between two or more variables.

    :param dim: Number of dimensions
    :type dim: int

    :ivar dim: Number of dimensions
    :ivar num_pts: Number of points in surrogate model
    :ivar X: Point incorporated in surrogate model (num_pts x dim)
    :ivar fX: Function values in surrogate model (num_pts x 1)
    :ivar updated: True if model is up-to-date (no refit needed)
    :ivar model: Earth object
    """
    def __init__(self, dim):
        self.num_pts = 0
        self.X = np.empty([0, dim])
        self.fX = np.empty([0, 1])
        self.dim = dim
        self.updated = False

        try:
            from pyearth import Earth
            self.model = Earth()
        except ImportError as err:
            print("Failed to import pyearth")
            raise err

    def _fit(self):
        """Compute new coefficients if the MARS interpolant is not updated."""
        warnings.simplefilter("ignore")  # Surpress deprecation warnings
        if self.updated is False:
            self.model.fit(self.X, self.fX)
            self.updated = True

    def predict(self, xx):
        """Evaluate the MARS interpolant at the points xx

        :param xx: Prediction points, must be of size num_pts x dim or (dim, )
        :type xx: numpy.ndarray

        :return: Prediction of size num_pts x 1
        :rtype: numpy.ndarray
        """
        self._fit()
        xx = np.atleast_2d(xx)
        return np.expand_dims(self.model.predict(xx), axis=1)

    def predict_deriv(self, xx):
        """Evaluate the derivative of the MARS interpolant at points xx

        :param xx: Prediction points, must be of size num_pts x dim or (dim, )
        :type xx: numpy.array

        :return: Derivative of the RBF interpolant at xx
        :rtype: numpy.array
        """
        self._fit()
        xx = np.expand_dims(xx, axis=0)
        dfx = self.model.predict_deriv(xx, variables=None)
        return dfx[0]
Example #45
0
class MARSInterpolant(Earth):
    """Compute and evaluate a MARS interpolant

    MARS builds a model of the form

    .. math::

        \hat{f}(x) = \sum_{i=1}^{k} c_i B_i(x).

    The model is a weighted sum of basis functions :math:`B_i(x)`. Each basis
    function :math:`B_i(x)` takes one of the following three forms:

    1. a constant 1.
    2. a hinge function of the form :math:`\max(0, x - const)` or \
       :math:`\max(0, const - x)`. MARS automatically selects variables \
       and values of those variables for knots of the hinge functions.
    3. a product of two or more hinge functions. These basis functions c \
       an model interaction between two or more variables.

    :param maxp: Initial capacity
    :type maxp: int

    :ivar nump: Current number of points
    :ivar maxp: Initial maximum number of points (can grow)
    :ivar x: Interpolation points
    :ivar fx: Function evaluations of interpolation points
    :ivar dim: Number of dimensions
    :ivar model: MARS interpolation model
    """
    def __init__(self, maxp=100):
        self.nump = 0
        self.maxp = maxp
        self.x = None  # pylint: disable=invalid-name
        self.fx = None
        self.dim = None
        self.model = Earth()
        self.updated = False

    def reset(self):
        """Reset the interpolation."""

        self.nump = 0
        self.x = None
        self.fx = None
        self.updated = False

    def _alloc(self, dim):
        """Allocate storage for x, fx, rhs, and A.

        :param dim: Number of dimensions
        :type dim: int
        """

        maxp = self.maxp
        self.dim = dim
        self.x = np.zeros((maxp, dim))
        self.fx = np.zeros((maxp, 1))

    def _realloc(self, dim, extra=1):
        """Expand allocation to accommodate more points (if needed)

        :param dim: Number of dimensions
        :type dim: int
        :param extra: Number of additional points to accommodate
        :type extra: int
        """

        if self.nump == 0:
            self._alloc(dim)
        elif self.nump + extra > self.maxp:
            self.maxp = max(self.maxp * 2, self.maxp + extra)
            self.x.resize((self.maxp, dim))
            self.fx.resize((self.maxp, 1))

    def get_x(self):
        """Get the list of data points

        :return: List of data points
        :rtype: numpy.array
        """

        return self.x[:self.nump, :]

    def get_fx(self):
        """Get the list of function values for the data points.

        :return: List of function values
        :rtype: numpy.array
        """

        return self.fx[:self.nump, :]

    def add_point(self, xx, fx):
        """Add a new function evaluation

        :param xx: Point to add
        :type xx: numpy.array
        :param fx: The function value of the point to add
        :type fx: float
        """

        dim = len(xx)
        self._realloc(dim)
        self.x[self.nump, :] = xx
        self.fx[self.nump, :] = fx
        self.nump += 1
        self.updated = False

    def eval(self, x, ds=None):
        """Evaluate the MARS interpolant at the point x

        :param x: Point where to evaluate
        :type x: numpy.array
        :param ds: Not used
        :type ds: None
        :return: Value of the MARS interpolant at x
        :rtype: float
        """

        if self.updated is False:
            self.model.fit(self.get_x(), self.get_fx())
        self.updated = True

        x = np.expand_dims(x, axis=0)
        fx = self.model.predict(x)
        return fx[0]

    def evals(self, x, ds=None):
        """Evaluate the MARS interpolant at the points x

        :param x: Points where to evaluate, of size npts x dim
        :type x: numpy.array
        :param ds: Not used
        :type ds: None
        :return: Values of the MARS interpolant at x, of length npts
        :rtype: numpy.array
        """

        if self.updated is False:
            self.model.fit(self.get_x(), self.get_fx())
        self.updated = True

        fx = np.zeros(shape=(x.shape[0], 1))
        fx[:, 0] = self.model.predict(x)
        return fx

    def deriv(self, x, ds=None):
        """Evaluate the derivative of the MARS interpolant at a point x

        :param x: Point for which we want to compute the MARS gradient
        :type x: numpy.array
        :param ds: Not used
        :type ds: None
        :return: Derivative of the MARS interpolant at x
        :rtype: numpy.array
        """

        if self.updated is False:
            self.model.fit(self.get_x(), self.get_fx())
        self.updated = True

        x = np.expand_dims(x, axis=0)
        dfx = self.model.predict_deriv(x, variables=None)
        return dfx[0]
Example #46
0
#drawCumulativeHist(y,'PM2.5','Frequency','Curve cumulative of PM2.5')
##箱图
#drawBox(y,'PM2.5','BOX of PM2.5')
##print y.shape
##重新对y进行shape塑造,方便后面的计算,从这里开始y是reshape之后的y
y=y.reshape(-1,1)#不影响结果

#拟合
#1)Fit an Earth model
model = Earth()
model.fit(X,y) #这里用的是标准化之后的数据
#2)Print the model模型结果
print(model.trace())
print(model.summary())
#3)预测的y
y_hat = model.predict(X)
#print y_hat
#print'RMSE',numpy.sqrt(metrics.mean_squared_error(y, y_hat))
#print'MSE',metrics.mean_squared_error(y, y_hat)

#绘图显示
pyplot.figure(figsize=(12,6)) 
pyplot.plot(X,y,'m+',label='original values')
pyplot.plot(X,y_hat,'b.',label='polyfit values')
pyplot.legend(loc=4) #指定legend的位置右下角

#设置坐标轴刻度
my_x_ticks = numpy.arange(0,3.5,0.5)
my_y_ticks = numpy.arange(0,600,50)
pyplot.xticks(my_x_ticks)
pyplot.yticks(my_y_ticks)
np.random.seed(1)
m = 1000
n = 5

X = np.random.normal(size=(m,n))

# Make X[:,1] binary
X[:,1] = np.random.binomial(1,.5,size=m)

# The response is a linear function of the inputs
y = 2 * X[:,0] + 3 * X[:,1] + np.random.normal(size=m)

# Fit the earth model
model = Earth().fit(X, y)

# Print the model summary, showing linear terms
print model.summary()

# Plot for both values of X[:,1]
y_hat = model.predict(X)
plt.figure()
plt.plot(X[:,0], y, 'k.')
plt.plot(X[X[:,1] == 0, 0], y_hat[X[:,1] == 0], 'r.', label='$x_1 = 0$')
plt.plot(X[X[:,1] == 1, 0], y_hat[X[:,1] == 1], 'b.', label='$x_1 = 1$')
plt.legend(loc='best')
plt.xlabel('$x_0$')
plt.show()


Example #48
0
         'Old Qual_Score',
         'Old Avg_Position',
         'Old Impressions',
         'New Impressions',
         #'New Avg_CPC',
         'Old Avg_CPC',
         'New Keyword Density', 
         'Old Keyword Density',
         'New Value_Click',
         'Old Value_Click']]

#Print the model
print model.trace()
print model.summary()

y_cpc_hat = model.predict(X_cpc_hat)

#Plot the model

pyplot.figure()
pyplot.plot(y_cpc,'r.')
pyplot.plot(y_cpc_hat,'b.')
pyplot.xlabel('x')
pyplot.ylabel('y')
pyplot.title('MARS Regression')
pyplot.show()


'''
#Build Conv_Rate Model
#Build conv table