def marsmodelorr(self, use_smY=True, slope_trunc=0.00001, savgol_window=151, savgol_order=3, ex_order=51): Xf, Yf = self.Xf_, self.Yf_ X, Y = self.X_, self.Y_ fom = {} # smooth the data smY = savgol(Y, savgol_window, savgol_order) # perform mars model = MARS() if use_smY: model.fit(X, smY) else: model.fit(X, Y) Y_h = model.predict(X) ''' calculate dydx based on mars model to get knots and intercepts as this is complicated to extract from hinge functions ''' diff1 = np.diff(Y_h) / np.diff(X) tdiff1 = diff1 - np.nanmin(diff1) tdiff1 = tdiff1 / np.nanmax(tdiff1) #calculate slopes of linear segments ID = [i for i in range(1, len(tdiff1)) if np.abs(tdiff1[i] - tdiff1[i - 1]) > slope_trunc] ID.insert(0, 0) ID.append(np.argmax(X)) # this might cause an error slopes = [np.nanmean(diff1[ID[i - 1]:ID[i]]) for i in range(1, len(ID) - 1)] a = [Y_h[ID[i]] - slopes[i] * X[ID[i]] for i in range(len(ID) - 2)] IDM, IDm = np.argmax(slopes), np.argmin(np.abs(slopes)) # intercept of highest slope and zero as well as highest slope and lowest slope fom['zinter'] = -a[IDM] / slopes[IDM] fom['lminter'] = (a[IDM] - a[IDm]) / (slopes[IDm] - slopes[IDM]) fom['max_slope'] = slopes[IDM] fom['curr_lminter_model'] = fom['lminter'] * slopes[IDM] + a[IDM] fom['curr_lminter_data'] = np.mean(Y[np.where(np.abs(X - fom['lminter']) < 0.5)[0]]) # calculate how the CV curves kight look like without the 'ORR part' srYs = smY - model.predict(X) srYf = savgol(Yf - model.predict(Xf), savgol_window, savgol_order) # calculate their derivative dsrYf = savgol(np.diff(srYf) / np.diff(Xf), savgol_window, savgol_order) # find the extrema in the derivatives for extraction of redox pots redID_f = argrelextrema(srYf, np.less, order=ex_order) oxID_f = argrelextrema(srYf, np.greater, order=ex_order) # calc some more foms like position of redox waves fom['redpot_f'], fom['redpot_f_var'] = np.nanmean(Xf[redID_f]), np.nanstd(Xf[redID_f]) fom['oxpot_f'], fom['oxpot_f_var'] = np.nanmean(Xf[oxID_f]), np.nanstd(Xf[oxID_f]) fom['X'], fom['Xf'] = X, Xf fom['srYs'], fom['srYf'], fom['smY'] = srYs, srYf, smY fom['Y'], fom['Yf'], fom['Y_h'] = Y, Yf, Y_h fom['noise_lvl'] = np.sum((Y_h - Y) ** 2, axis=0) self.fom = fom
def fit_mars(self, X_test): reg = Earth(max_terms=1000, max_degree=1, penalty=3) reg.fit(self.X.copy().values, self.y.copy().values.flatten()) preds = reg.predict(X_test.copy().values) ids = X_test.index pred_df = pd.DataFrame(data=preds, index=ids, columns=['SalePrice']) pred_df.to_csv('results/results_mars.csv', sep=',')
def mars(x_train, x_test, y_train, y_test, timestamp): # set model model = Earth(max_degree=1, penalty=1.0, endspan=5) # predict model = model.fit(x_train, y_train) y_pred = model.predict(x_test) # score # score=model.score(x_test,y_test) correlation_matrix = np.corrcoef(y_test, y_pred) correlation_xy = correlation_matrix[0, 1] score = correlation_xy**2 MSE, MAD, MAPE = outputReport.regression_basic_results(y_test, y_pred) fileName, result = outputReport.regression_extanded_results( timestamp, y_test, y_pred, "mars") try: model_summary = str(model.summary()) model_summary_final = model_summary.replace("\n", "<br>") result += "<br>Model Parameters:<br>" + str(model.get_params( )) + "<br>Model Summary:<br>" + model_summary_final except: result += "<br>Model Summary is not available for MARS" return score, fileName, MSE, MAD, MAPE, result
def estimate_reward(self, z_train, y_train, z): rcond = None mars_model = Earth(max_degree=2) mars_model.fit(z_train, y_train) reward = mars_model.predict([z]) # print("params: ", mars_model.coef_) return reward
def test_export_python_string(): for smooth in (True, False): model = Earth(penalty=1, smooth=smooth, max_degree=2).fit(X, y) export_model = export_python_string(model, 'my_test_model') six.exec_(export_model, globals()) for exp_pred, model_pred in zip(model.predict(X), my_test_model(X)): assert_almost_equal(exp_pred, model_pred)
def test_copy_compatibility(): model = Earth(**default_params).fit(X, y) model_copy = copy.copy(model) assert_true(model_copy == model) assert_true(numpy.all(model.predict(X) == model_copy.predict(X))) assert_true(model.basis_[0] is model.basis_[1]._get_root()) assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
def marsAccuracy(self): #setting index as date values self.df.index = self.df['Date'] self.train = self.df[:200] self.valid = self.df[200:] #Split data: x_train = self.train.drop('Close', axis=1) y_train = self.train['Close'] x_valid = self.valid.drop('Close', axis=1) y_valid = self.valid['Close'] x_train = timeToFloat(x_train) x_valid = timeToFloat(x_valid) # define the model model = Earth() # fit the model on training dataset model.fit(x_train, y_train) self.preds = model.predict(x_valid) #Result #rmse rmse = np.sqrt(mean_squared_error(y_valid, self.preds)) return rmse
def mars(df_train, df_test, exogenous_features, scale_list=None, max_degree=2): if scale_list is None: scale_list = [] if len(scale_list) > 0: for col in scale_list: df_train.loc[df_train[col] < 0, col] = 0 df_test.loc[df_test[col] < 0, col] = 0 df_train[col] = np.log(df_train[col] + 1) df_test[col] = np.log(df_test[col] + 1) X_train = df_train[exogenous_features] X_test = df_test[exogenous_features] y_train = df_train['y'] model = Earth(max_degree=max_degree, allow_missing=True, enable_pruning=True, minspan_alpha=.5, thresh=.001, smooth=False, verbose=False) model = model.fit(X_train, y_train) # Predict forecast = model.predict(X_test) if forecast < 0: forecast[0] = 0 if len(scale_list) > 0: forecast[0] = np.exp(forecast[0]) return np.round(forecast.item(), 0)
class MARS: def __init__(self, x_train, y_train, x_test, y_test): self.x_train = x_train self.y_train = y_train self.x_test = x_test self.y_test = y_test self.classifier = None def fit(self): self.classifier = Earth() self.classifier.fit(self.x_train, self.y_train) def predict(self): return self.classifier.predict(self.x_test) def dichotomize(self, predictions): median = np.median(predictions) res = np.array([1 if y >= median else -1 for y in predictions]) return res def evaluate(self): predictions = self.dichotomize(self.predict()) # print(predictions) error = 0.0 for y, correct in zip(predictions, self.y_test): if y != correct: error += 1 return error / len(self.y_test)
def marsFit(x,y): model = Earth(max_degree=1) model.fit(x,y) def f(x): return model.predict(x) return model.predict(x), model, range(len(x)), f
def test_copy_compatibility(): model = Earth(**default_params).fit(X, y) model_copy = copy.copy(model) assert_true(model_copy == model) assert_true( numpy.all(model.predict(X) == model_copy.predict(X))) assert_true(model.basis_[0] is model.basis_[1]._get_root()) assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
def test_copy_compatibility(): numpy.random.seed(0) model = Earth(**default_params).fit(X, y) model_copy = copy.copy(model) assert_true(model_copy == model) assert_array_almost_equal(model.predict(X), model_copy.predict(X)) assert_true(model.basis_[0] is model.basis_[1]._get_root()) assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())
def run_pyearth(X, y, **kwargs): '''Run with pyearth. Return prediction value, training time, and number of forward pass iterations.''' model = Earth(**kwargs) t0 = time.time() model.fit(X, y) t1 = time.time() y_pred = model.predict(X) forward_iterations = len(model.forward_trace()) - 1 return y_pred, t1 - t0, forward_iterations
def test_nb_terms(): for max_terms in (1, 3, 12, 13): model = Earth(max_terms=max_terms) model.fit(X, y) assert_true(len(model.basis_) <= max_terms + 2) assert_true(len(model.coef_) <= len(model.basis_)) assert_true(len(model.coef_) >= 1) if max_terms == 1: assert_list_almost_equal_value(model.predict(X), y.mean())
def test_nb_terms(): for max_terms in (1, 3, 12, 13): model = Earth(max_terms=max_terms) model.fit(X, y) assert_true(len(model.basis_) <= max_terms) assert_true(len(model.coef_) <= len(model.basis_)) assert_true(len(model.coef_) >= 1) if max_terms == 1: assert_list_almost_equal_value(model.predict(X), y.mean())
def test_export_sympy(): import pandas as pd from sympy.utilities.lambdify import lambdify from sympy.printing.lambdarepr import NumPyPrinter class PyEarthNumpyPrinter(NumPyPrinter): def _print_Max(self, expr): return 'maximum(' + ','.join(self._print(i) for i in expr.args) + ')' def _print_NaNProtect(self, expr): return 'where(isnan(' + ','.join(self._print(a) for a in expr.args) + '), 0, ' \ + ','.join(self._print(a) for a in expr.args) + ')' def _print_Missing(self, expr): return 'isnan(' + ','.join(self._print(a) for a in expr.args) + ').astype(float)' for smooth, n_cols, allow_missing in product((True, False), (1, 2), (True, False)): X_df = pd.DataFrame(X.copy(), columns=['x_%d' % i for i in range(X.shape[1])]) y_df = pd.DataFrame(Y[:, :n_cols]) if allow_missing: # Randomly remove some values so that the fitted model contains MissingnessBasisFunctions X_df['x_1'][numpy.random.binomial(n=1, p=.1, size=X_df.shape[0]).astype(bool)] = numpy.nan model = Earth(allow_missing=allow_missing, smooth=smooth, max_degree=2).fit(X_df, y_df) expressions = export_sympy(model) if n_cols > 1 else [export_sympy(model)] module_dict = {'select': numpy.select, 'less_equal': numpy.less_equal, 'isnan': numpy.isnan, 'greater_equal':numpy.greater_equal, 'logical_and': numpy.logical_and, 'less': numpy.less, 'logical_not':numpy.logical_not, "greater": numpy.greater, 'maximum':numpy.maximum, 'Missing': lambda x: numpy.isnan(x).astype(float), 'NaNProtect': lambda x: numpy.where(numpy.isnan(x), 0, x), 'nan': numpy.nan, 'float': float, 'where': numpy.where } for i, expression in enumerate(expressions): # The lambdified functions for smoothed basis functions only work with modules='numpy' and # for regular basis functions with modules={'Max':numpy.maximum}. This is a confusing situation func = lambdify(X_df.columns, expression, printer=PyEarthNumpyPrinter, modules=module_dict) y_pred_sympy = func(*[X_df.loc[:,var] for var in X_df.columns]) y_pred = model.predict(X_df)[:,i] if n_cols > 1 else model.predict(X_df) assert_array_almost_equal(y_pred, y_pred_sympy)
def runModel(i,featureCombo): mae = np.array([]) logging.warning('try alpha = %s' % i) for ktrain,ktest in kf: x = trainCleaned.iloc[ktrain,] y = trainCleaned.iloc[ktest,] model = Earth() model.fit(x[featureCombo],x['Expected']) pred = model.predict(y[featureCombo]) mae = np.append(mae,(getMAE(pred,y['Expected']))) logging.warning('average 10-fold MAE for alpha %s feature %s' % (i,featureCombo)) logging.warning(mae.mean())
def mars(p, xLabels, yLabel): global image_num criteria = ('rss', 'gcv', 'nb_subsets') # Randomly shuffle rows p = p.sample(frac=1).reset_index(drop=True) # Split train and test twentyPercent = -1 * round(p.shape[0] * 0.2) n = len(xLabels) xCol = p[xLabels].values.reshape(-1, n) X_train = xCol[:twentyPercent] X_test = xCol[twentyPercent:] y_train = p[yLabel][:twentyPercent].values.reshape(-1, 1) y_test = p[yLabel][twentyPercent:].values.reshape(-1, 1) # Fit MARS model model = Earth(feature_importance_type=criteria) model.fit(X_train, y_train) # Make predictions predicted = model.predict(X_test) r2 = r2_score(y_test, predicted) mse = mean_squared_error(y_test, predicted) predicted = predicted.reshape(-1, 1) # Plot residuals plotResiduals(y_test, predicted) # Print summary print(model.trace()) print(model.summary()) # Plot feature importances importances = model.feature_importances_ for crit in criteria: x = list(range(0, len(xLabels))) sorted_rss = [ list(t) for t in sorted(zip(importances[crit], xLabels), reverse=True) ] coeff = [] feature = [] for j in range(0, len(sorted_rss)): coeff.append(abs(sorted_rss[j][0])) feature.append(featureToLabel[sorted_rss[j][1]]) plt.clf() plt.xticks(x, feature, rotation='vertical') plt.bar(x, coeff, align='center', alpha=0.5) plt.xlabel('Features') label = "Importance (" + crit + ")" plt.ylabel(label) plt.tight_layout() label = "mars_imp_" + crit plt.show() plt.savefig(image_path.format(image_num), bbox_inches='tight') image_num += 1 return r2, mse
class Diagnostics: def __init__(self, env, features, *args, **kwargs): self.env = env self.solution = features self.data = env.X.loc[:, features.astype(bool)].copy() self.y = self.env.y self.model = EarthModel(*args, **kwargs) self.y_pred = None self.error = None self._fit() def _fit(self): self.model.fit(self.data, self.y) self.y_pred = self.model.predict(self.data) self.error = (self.y_pred.flatten() - self.env.y.flatten()) def summary(self): return model_summary(self.model, self.data.columns).sort_values("feature") def plot_thresholds(self): return plot_thresholds(self.summary(), self.data) def plot_autocorrelations(self): from statsmodels.graphics.tsaplots import plot_pacf, plot_acf _ = plot_pacf(self.error) _ = plot_acf(self.error) def plot_qq(self): fig, ax = plt.subplots() _, (slope, intercept, r_norm) = scipy.stats.probplot(self.error, plot=ax, fit=True) print("R squared {:.4f}".format(r_norm**2)) def plot_pred(self): df = pd.DataFrame({ "predicted": self.y_pred, "True value": self.y }, index=self.data.index) return df.hvplot().opts( title="Model prediction for {}".format(self.env.target)) def score(self): mse, gvc, rsq, grsq = self.model.mse_, self.model.gcv_, self.model.rsq_, self.model.grsq_ msg = "MSE: {:.4f}, GCV: {:.4f}, RSQ:{:.4f}, GRSQ: {:.4f}".format( mse, gvc, rsq, grsq) return msg
def test_output_weight(): x = numpy.random.uniform(-1, 1, size=(1000, 1)) y = (numpy.dot(x, numpy.random.normal(0, 1, size=(1, 10)))) ** 5 + 1 y = (y - y.mean(axis=0)) / y.std(axis=0) group = numpy.array([1] * 5 + [0] * 5) output_weight = numpy.array([1] * 5 + [2] * 5, dtype=float) model = Earth().fit(x, y, output_weight=output_weight) # Check that the model fits at least better # the more heavily weighted group mse = ((model.predict(x) - y)**2).mean(axis=0) group1_mean = mse[group].mean() group2_mean = mse[numpy.logical_not(group)].mean() assert_true(group1_mean > group2_mean or round(abs(group1_mean - group2_mean), 7) == 0)
def test_output_weight(): x = numpy.random.uniform(-1, 1, size=(1000, 1)) y = (numpy.dot(x, numpy.random.normal(0, 1, size=(1, 10))))**5 + 1 y = (y - y.mean(axis=0)) / y.std(axis=0) group = numpy.array([1] * 5 + [0] * 5) output_weight = numpy.array([1] * 5 + [2] * 5, dtype=float) model = Earth().fit(x, y, output_weight=output_weight) # Check that the model fits at least better # the more heavily weighted group mse = ((model.predict(x) - y)**2).mean(axis=0) group1_mean = mse[group].mean() group2_mean = mse[numpy.logical_not(group)].mean() assert_true(group1_mean > group2_mean or round(abs(group1_mean - group2_mean), 7) == 0)
def MARS(self, X=None, Y=None): """This function is used to imeplement Multivariate Adadptive Regression Splines """ from pyearth import Earth rgr = Earth() if (X is not None and Y is not None): (self.sampled_X, self.sampled_Y) = (X, Y) # train rgr.fit(self.sampled_X, self.sampled_Y) # test Y_pred = rgr.predict(self.X) # compute metric m_nmse = self.metric.normalized_mean_square_error(Y_pred, self.Y) m_mape = self.metric.mean_absolute_percentage_error(Y_pred, self.Y) return (m_nmse, m_mape)
def Mars_detrend(x, y): model = Earth() model.fit(x, y) # print(model.trace()) # print(model.summary()) y_hat = model.predict(x) # pyplot.figure() # pyplot.plot(x,y,'r.') # pyplot.plot(x,y_hat,'b.') # pyplot.xlabel('x_6') # pyplot.ylabel('y') # pyplot.title('Maize yield in a grid') # pyplot.show() return y_hat
def mars(self, max_degree=2): model = Earth(max_degree=max_degree, allow_missing=True, enable_pruning=True, minspan_alpha=.5, thresh=.001, smooth=False, verbose=False) model = model.fit(self.X_train, self.y_train) forecast = model.predict(self.X_test) if forecast < 0: forecast[0] = 0 if len(self.scale_list) > 0: forecast[0] = np.exp(forecast[0]) return np.round(forecast.item(), 0)
def getTrain(trainData, testData): size_s = len(trainData) size_t = len(testData) lenY = len(testData[0]) X = numpy.zeros((size_s,lenY-1)) Y = numpy.zeros((size_s,1)) z = 0 for d in trainData: for j in range(lenY-1): X[z][j] = d[j] Y[z][0] = float(d[lenY-1]) z += 1 z = 0 dX = numpy.zeros((size_t,lenY-1)) for d in testData: for j in range(lenY-1): dX[z][j] = d[j] z += 1 model = Earth() model.fit(X,Y) y_hat = model.predict(dX) corrent = 0 for i in range(size_t): x1 = testData[i][lenY-1] x2 = y_hat[i] if x1 * x2 >= 0: corrent += 1 return corrent
def mars(df_train, df_test, exogenous_features, max_degree=2): if (df_test['IP'].values == 0) & (df_test['CON'].values == 0): forecast = 0 return forecast X_train = df_train[exogenous_features] X_test = df_test[exogenous_features] y_train = df_train['y'] model = Earth(max_degree=max_degree, allow_missing=True, enable_pruning=True, minspan_alpha=.5, thresh=.001, smooth=False, verbose=False) model = model.fit(X_train, y_train) # Predict forecast = model.predict(X_test) if forecast < 0: forecast[0] = 0 return np.round(forecast.item(), 0)
def mars_forecast(x_train, x_test, y_train, timestamp): # set model model = Earth(max_degree=1, penalty=1.0, endspan=5) # predict model = model.fit(x_train, y_train) y_pred = pd.DataFrame(model.predict(x_test), columns=["Forecasted Values"]) filename = outputReport.regression_extanded_results_forecast( timestamp, y_pred, "mars forecast") try: model_summary = str(model.summary()) model_summary_final = model_summary.replace("\n", "<br>") result = "<br>Model Parameters:<br>" + str(model.get_params( )) + "<br>Model Summary:<br>" + model_summary_final except: result = "Model Summary is not available for MARS" result += str( y_pred.to_html(formatters={'Name': lambda x: '<b>' + x + '</b>'})) return filename, result
class MARSInterpolant(Earth): """Compute and evaluate a MARS interpolant :ivar nump: Current number of points :ivar maxp: Initial maximum number of points (can grow) :ivar x: Interpolation points :ivar fx: Function evaluations of interpolation points :ivar dim: Number of dimensions :ivar model: MARS interpolaion model """ def __init__(self, maxp=100): self.nump = 0 self.maxp = maxp self.x = None # pylint: disable=invalid-name self.fx = None self.dim = None self.model = Earth() self.updated = False def reset(self): """Reset the interpolation.""" self.nump = 0 self.x = None self.fx = None self.updated = False def _alloc(self, dim): """Allocate storage for x, fx, rhs, and A. :param dim: Number of dimensions """ maxp = self.maxp self.dim = dim self.x = np.zeros((maxp, dim)) self.fx = np.zeros((maxp, 1)) def _realloc(self, dim, extra=1): """Expand allocation to accommodate more points (if needed) :param dim: Number of dimensions :param extra: Number of additional points to accommodate """ if self.nump == 0: self._alloc(dim) elif self.nump+extra > self.maxp: self.maxp = max(self.maxp*2, self.maxp+extra) self.x.resize((self.maxp, dim)) self.fx.resize((self.maxp, 1)) def get_x(self): """Get the list of data points :return: List of data points """ return self.x[:self.nump, :] def get_fx(self): """Get the list of function values for the data points. :return: List of function values """ return self.fx[:self.nump, :] def add_point(self, xx, fx): """Add a new function evaluation :param xx: Point to add :param fx: The function value of the point to add """ dim = len(xx) self._realloc(dim) self.x[self.nump, :] = xx self.fx[self.nump, :] = fx self.nump += 1 self.updated = False def eval(self, xx, d=None): """Evaluate the MARS interpolant at the point xx :param xx: Point where to evaluate :return: Value of the MARS interpolant at x """ if self.updated is False: self.model.fit(self.x, self.fx) self.updated = True xx = np.expand_dims(xx, axis=0) fx = self.model.predict(xx) return fx[0] def evals(self, xx, d=None): """Evaluate the MARS interpolant at the points xx :param xx: Points where to evaluate :return: Values of the MARS interpolant at x """ if self.updated is False: self.model.fit(self.x, self.fx) self.updated = True fx = np.zeros(shape=(xx.shape[0], 1)) fx[:, 0] = self.model.predict(xx) return fx def deriv(self, x, d=None): """Evaluate the derivative of the MARS interpolant at x :param x: Data point :return: Derivative of the MARS interpolant at x """ if self.updated is False: self.model.fit(self.x, self.fx) self.updated = True x = np.expand_dims(x, axis=0) dfx = self.model.predict_deriv(x, variables=None) return dfx[0]
def earth(x, y): model = Earth(max_terms=30, endspan=2, thresh=0.00001) model.fit(np.array(x), np.array(y)) return model.predict(x)
k = 1 fig = plt.figure() for i, alpha in enumerate(alphas): # Fit an Earth model model = Earth(max_degree=5, minspan_alpha=.05, endspan_alpha=.05, max_terms=10, check_every=1, thresh=0.) output_weight = np.array([alpha, 1 - alpha]) model.fit(X, y_mix, output_weight=output_weight) print(model.summary()) # Plot the model y_hat = model.predict(X) mse = ((y_hat - y_mix) ** 2).mean(axis=0) ax = plt.subplot(n_plots, 2, k) ax.set_ylabel("Run {0}".format(i + 1), rotation=0, labelpad=20) plt.plot(X[:, 6], y_mix[:, 0], 'r.') plt.plot(X[:, 6], model.predict(X)[:, 0], 'b.') plt.title("MSE: {0:.3f}, Weight : {1:.1f}".format(mse[0], alpha)) plt.subplot(n_plots, 2, k + 1) plt.plot(X[:, 5], y_mix[:, 1], 'r.') plt.plot(X[:, 5], model.predict(X)[:, 1], 'b.') plt.title("MSE: {0:.3f}, Weight : {1:.1f}".format(mse[1], 1 - alpha)) k += 2 plt.tight_layout() plt.show()
def mars_method(energy, absorption_coefficient, bg_type='direct', show_graph=True): direct_abs_corrected = absorption_coefficient t = [] for k in direct_abs_corrected: t.append(k / (max(direct_abs_corrected))) direct_abs_corrected = t model = Earth() try: model.fit(np.array(energy), np.array(direct_abs_corrected)) except ValueError: return 'Problem in MARS fitting parameters!' energy_elbows = [] energy_elbows.append(min(energy)) energy_elbows.append(max(energy)) for coeff in list(model.basis_)[1:]: try: if float(re.findall("\d+\.\d+", str(coeff))[0]) not in energy_elbows: energy_elbows.append( float(re.findall("\d+\.\d+", str(coeff))[0])) except IndexError: print(coeff) pass y_hat = model.predict(energy) if show_graph == True: plt.figure() plt.scatter(energy, direct_abs_corrected, color='k') plt.plot(energy, y_hat, 'b.') plt.xlabel('Energy (eV)', fontsize=14) plt.ylabel('(E' + u"\u03B1" + ')' + u"\u00B2", fontsize=14) # plt.ylabel('sqrt(E'+u"\u03B1"+')',fontsize=14) # plt.ylabel('Normalized Direct Absorbance',fontsize=14) plt.title('Tauc Plot for Direct Transitions', fontsize=20) plt.xticks(fontsize=14) plt.yticks(fontsize=14) function = export.export_sympy(model) direct_abs_elbows = [] for coeff in energy_elbows: direct_abs_elbows.append(function.evalf(subs={'x0': coeff})) elbows_list = [] for elbow_num in range(0, len(energy_elbows)): elbows_list.append( tuple([energy_elbows[elbow_num], direct_abs_elbows[elbow_num]])) elbows_list = sorted(elbows_list) line_segs = [] for point in range(0, len(elbows_list) - 1): que = [] for w in energy: if w > elbows_list[point][0] and w < elbows_list[point + 1][0]: que.append(w) num_pts = len(que) x_length = elbows_list[point + 1][0] - elbows_list[point][0] length = ((elbows_list[point+1][0]-elbows_list[point][0])**2\ +(elbows_list[point+1][1]-elbows_list[point][1])**2)**.5 slope = (elbows_list[point + 1][1] - elbows_list[point][1]) / ( elbows_list[point + 1][0] - elbows_list[point][0]) y_intercept = elbows_list[point + 1][1] - slope * elbows_list[point + 1][0] x_intercept = (-1 * y_intercept) / slope weighting_factor = slope**2 * x_length * 2 * abs(length)**.5 * num_pts try: if x_intercept > 0 and slope > 0 and num_pts > 10: line_segs.append( tuple([ x_length, length, slope, y_intercept, x_intercept, weighting_factor ])) except TypeError: print('Weird complex zoo error..') pass line_segs = sorted(line_segs, key=lambda item: item[5]) # print(line_segs) winner = max(line_segs, key=lambda item: item[5]) adj_energy = np.linspace(min(energy), max(energy), num=1000) adj_winner = [] for t in adj_energy: adj_winner.append(t * float(winner[2]) + float(winner[3])) if show_graph == True: plt.scatter(adj_energy, adj_winner, color='r') plt.axis([min(energy), max(energy), 0, 1]) plt.show() return winner[4]
y1 = 100 * \ numpy.abs(numpy.sin((X[:, 6]) / 10) - 4.0) + \ 10 * numpy.random.normal(size=m) y2 = 100 * \ numpy.abs(numpy.sin((X[:, 6]) / 2) - 8.0) + \ 5 * numpy.random.normal(size=m) # Fit an Earth model model = Earth(max_degree=3, minspan_alpha=.5) y_mix = numpy.concatenate((y1[:, numpy.newaxis], y2[:, numpy.newaxis]), axis=1) model.fit(X, y_mix) # Print the model print(model.trace()) print(model.summary()) # Plot the model y_hat = model.predict(X) fig = plt.figure() ax = fig.add_subplot(1, 2, 1) ax.plot(X[:, 6], y_mix[:, 0], 'r.') ax.plot(X[:, 6], model.predict(X)[:, 0], 'b.') ax = fig.add_subplot(1, 2, 2) ax.plot(X[:, 6], y_mix[:, 1], 'r.') ax.plot(X[:, 6], model.predict(X)[:, 1], 'b.') plt.show()
def test_export_python_function(): for smooth in (True, False): model = Earth(penalty=1, smooth=smooth, max_degree=2).fit(X, y) export_model = export_python_function(model) for exp_pred, model_pred in zip(model.predict(X), export_model(X)): assert_almost_equal(exp_pred, model_pred)
# array([3.21838587, 3.16720653, 3.25737585, 3.2542665 , 3.24746355]) # 3. Lasso Regression lasso = Lasso(alpha=0.01) lassoMSE = kFoldValidation(5, lasso, array_train, array_y) lassoMSE # array([3.23388954, 3.18301436, 3.27518402, 3.27289743, 3.26569614]) # 4. Spline # Since it is too slow to do the k cross validation for spline, # just use validation set to test the performance. spline = Earth() spline.fit(array_train, array_y) array_val = np.array(x_val) array_y_val = np.array(np.log1p(y_val.iloc[:, 0])) preds_val = spline.predict(array_val) splineMSE = np.mean((preds_val - array_y_val)**2) splineMSE # 3.5848521191901126 # 5. Random Forest rf = RandomForestRegressor(max_depth=20, random_state=42, n_estimators=100) rf.fit(array_train, array_y) # Feature importance # Very interesting. The top 14 important features are not consistent # with the top 14 correlated features. dic = {} for feature, importance in zip(x_train.columns, rf.feature_importances_): dic[feature] = importance
x = np.exp(x) - 1 return x def graph(x, y, y2, a, b, Title): fig = plt.figure() plt.plot(x[a:b],y[a:b],'r', label='Actual') plt.plot(x[a:b],y2[a:b],'b', label='Predicted') plt.xlabel('x') plt.ylabel('y') plt.title(Title) plt.legend(loc='upper left') plt.show() return fig # Predict training series y_hat = mars.predict(x) x_train = list(range(0,len(y))) # Process test data test = test[cols].astype(str) for i in cols: for j in range(0,len(test)): test[i][j] = test[i][j].replace(",","") test = test.astype(float) HT = talib.HT_DCPERIOD(test['<OPEN>']) std = talib.STDDEV(test['<OPEN>'], timeperiod=7, nbdev=1) HT = pd.DataFrame(data={'HT_DCPERIOD':HT}) std = pd.DataFrame(data={'STDDEV':std})
def generate_MARS( training_data, modelname="4.01-MARS", responseColumn="log(q30)", predictorColumns="default", #default => all non-response columns in training data max_degree=2, minspan_alpha=0.5, smooth=False, trainingSplitRatio=0.8, trainingSplitRandom=random.RandomState(), persist=True, returnTestSetResults=False, #True ==> will return predictions, the actual, and the predictors verbose=True #setting this to True will still save a model, but not return any images/diagnostics ): from pyearth import Earth model = Earth(max_degree=max_degree, minspan_alpha=minspan_alpha, smooth=smooth) replace_nans_infs(training_data) X, y = splitXy(training_data, responseColumn, predictorColumns) Xtrain, Xtest, ytrain, ytest = train_test_split( X, y, train_size=trainingSplitRatio, random_state=trainingSplitRandom) model.fit(Xtrain, ytrain) ##Model evaluation: yhat = model.predict(Xtest) R2 = r2_score(yhat, ytest) #imported above via sklearn.metrics ##If model is successfully generated, output results## modelDir = "models/%s/%s/" % (modelname, today_string) imageDir = modelDir + "0-images/" if persist == True: try: os.listdir(modelDir) except: os.makedirs(modelDir) os.mkdir(imageDir) joblib.dump(model, modelDir + "model.pkl") plt.rcParams['figure.figsize'] = [9, 4] plt.subplot(121) plt.scatter(ytest, yhat, alpha=.1) plt.plot([0, 10], [0, 10]) #plt.xlim(0,10); plt.ylim(0,10) plt.xlabel("actual") plt.ylabel("predicted") plt.title("Model = MARS(%i)\t\t $R^2$=%.2f" % (max_degree, R2)) plt.subplot(122) sns.set(style="whitegrid") sns.residplot(ytest, yhat) #, lowess=True) plt.savefig(imageDir + "diagnostics.png") plt.close() if returnTestSetResults == True: return yhat, ytest, Xtest if verbose == True: print "MARS(%i) model successfully generated! \t\t\t\t\t(Train: %i, Test: %i)\n\tModel file saved in:\t\t%s\n\tDiagnostics plots saved in:\t%s\n" % ( max_degree, len(ytrain), len(ytest), modelDir, imageDir)
numpy.random.seed(0) m = 1000 n = 10 X = 80 * numpy.random.uniform(size=(m, n)) - 40 y = numpy.abs(X[:, 6] - 4.0) + 1 * numpy.random.normal(size=m) # Fit an Earth model model = Earth() model.fit(X, y) # Print the model print(model.trace()) print(model.summary()) # Plot the model y_hat = model.predict(X) pyplot.figure() pyplot.plot(X[:, 6], y, 'r.') pyplot.plot(X[:, 6], y_hat, 'b.') pyplot.xlabel('x_6') pyplot.ylabel('y') pyplot.title('Simple Earth Example') pyplot.savefig('simple_earth_example.png') #========================================================================= # Hinge plot #========================================================================= from xkcdify import XKCDify x = numpy.arange(-10, 10, .1) y = x * (x > 0)
folder_path = join(DATA_DIR, 'models', folder_name) os.makedirs(folder_path) # Dump the hyperparameter dictionary with open(join(folder_path, 'hyperparameters.pkl'), 'wb') as f: pickle.dump(hp, f, -1) training_times = [] for a in xrange(0, y.shape[1]): start = time.time() y_train = y_train_mat[:, a:(a + 1)].ravel() model = Earth(**hp) model.fit(X_train_scaled, y_train) end = time.time() print 'Fast MARS t-7, a{0} took {1} to train'.format(a, end - start) training_times.append(end - start) with open(join(DATA_DIR, 'models/{0}/MARS_a{1}.pkl'.format(folder_name, a)), 'wb') as f: pickle.dump(model, f, -1) start = time.time() y_pred_mat[:, a] = model.predict(X_test) end = time.time() print 'Fast MARS t-7, a{0} took {1} to predict'.format(a, end - start) sys.stdout.flush() RMSE = mean_squared_error(y_test_mat, y_pred_mat) ** 0.5 print RMSE with open(join(folder_path, 'stats.txt'), 'wb') as f: f.write('{0}\n{1}\n'.format(str(RMSE), sum(training_times)))
def test_export_sympy(): import pandas as pd from sympy.utilities.lambdify import lambdify from sympy.printing.lambdarepr import NumPyPrinter class PyEarthNumpyPrinter(NumPyPrinter): def _print_Max(self, expr): return 'maximum(' + ','.join(self._print(i) for i in expr.args) + ')' def _print_NaNProtect(self, expr): return 'where(isnan(' + ','.join(self._print(a) for a in expr.args) + '), 0, ' \ + ','.join(self._print(a) for a in expr.args) + ')' def _print_Missing(self, expr): return 'isnan(' + ','.join(self._print(a) for a in expr.args) + ').astype(float)' for smooth, n_cols, allow_missing in product((True, False), (1, 2), (True, False)): X_df = pd.DataFrame(X.copy(), columns=['x_%d' % i for i in range(X.shape[1])]) y_df = pd.DataFrame(Y[:, :n_cols]) if allow_missing: # Randomly remove some values so that the fitted model contains MissingnessBasisFunctions X_df['x_1'][numpy.random.binomial( n=1, p=.1, size=X_df.shape[0]).astype(bool)] = numpy.nan model = Earth(allow_missing=allow_missing, smooth=smooth, max_degree=2).fit(X_df, y_df) expressions = export_sympy(model) if n_cols > 1 else [ export_sympy(model) ] module_dict = { 'select': numpy.select, 'less_equal': numpy.less_equal, 'isnan': numpy.isnan, 'greater_equal': numpy.greater_equal, 'logical_and': numpy.logical_and, 'less': numpy.less, 'logical_not': numpy.logical_not, "greater": numpy.greater, 'maximum': numpy.maximum, 'Missing': lambda x: numpy.isnan(x).astype(float), 'NaNProtect': lambda x: numpy.where(numpy.isnan(x), 0, x), 'nan': numpy.nan, 'float': float, 'where': numpy.where } for i, expression in enumerate(expressions): # The lambdified functions for smoothed basis functions only work with modules='numpy' and # for regular basis functions with modules={'Max':numpy.maximum}. This is a confusing situation func = lambdify(X_df.columns, expression, printer=PyEarthNumpyPrinter, modules=module_dict) y_pred_sympy = func(*[X_df.loc[:, var] for var in X_df.columns]) y_pred = model.predict( X_df)[:, i] if n_cols > 1 else model.predict(X_df) assert_array_almost_equal(y_pred, y_pred_sympy)
class MARSInterpolant(Surrogate): """Compute and evaluate a MARS interpolant MARS builds a model of the form .. math:: \\hat{f}(x) = \\sum_{i=1}^{k} c_i B_i(x). The model is a weighted sum of basis functions :math:`B_i(x)`. Each basis function :math:`B_i(x)` takes one of the following three forms: 1. a constant 1. 2. a hinge function of the form :math:`\\max(0, x - const)` or \ :math:`\\max(0, const - x)`. MARS automatically selects variables \ and values of those variables for knots of the hinge functions. 3. a product of two or more hinge functions. These basis functions c \ an model interaction between two or more variables. :param dim: Number of dimensions :type dim: int :ivar dim: Number of dimensions :ivar num_pts: Number of points in surrogate model :ivar X: Point incorporated in surrogate model (num_pts x dim) :ivar fX: Function values in surrogate model (num_pts x 1) :ivar updated: True if model is up-to-date (no refit needed) :ivar model: Earth object """ def __init__(self, dim): self.num_pts = 0 self.X = np.empty([0, dim]) self.fX = np.empty([0, 1]) self.dim = dim self.updated = False try: from pyearth import Earth self.model = Earth() except ImportError as err: print("Failed to import pyearth") raise err def _fit(self): """Compute new coefficients if the MARS interpolant is not updated.""" with warnings.catch_warnings(): warnings.simplefilter("ignore") # Surpress deprecation warnings if self.updated is False: self.model.fit(self.X, self.fX) self.updated = True def predict(self, xx): """Evaluate the MARS interpolant at the points xx :param xx: Prediction points, must be of size num_pts x dim or (dim, ) :type xx: numpy.ndarray :return: Prediction of size num_pts x 1 :rtype: numpy.ndarray """ self._fit() xx = np.atleast_2d(xx) return np.expand_dims(self.model.predict(xx), axis=1) def predict_deriv(self, xx): """Evaluate the derivative of the MARS interpolant at points xx :param xx: Prediction points, must be of size num_pts x dim or (dim, ) :type xx: numpy.array :return: Derivative of the RBF interpolant at xx :rtype: numpy.array """ self._fit() xx = np.expand_dims(xx, axis=0) dfx = self.model.predict_deriv(xx, variables=None) return dfx[0]
class MARSInterpolant(Surrogate): """Compute and evaluate a MARS interpolant MARS builds a model of the form .. math:: \\hat{f}(x) = \\sum_{i=1}^{k} c_i B_i(x). The model is a weighted sum of basis functions :math:`B_i(x)`. Each basis function :math:`B_i(x)` takes one of the following three forms: 1. a constant 1. 2. a hinge function of the form :math:`\\max(0, x - const)` or \ :math:`\\max(0, const - x)`. MARS automatically selects variables \ and values of those variables for knots of the hinge functions. 3. a product of two or more hinge functions. These basis functions c \ an model interaction between two or more variables. :param dim: Number of dimensions :type dim: int :ivar dim: Number of dimensions :ivar num_pts: Number of points in surrogate model :ivar X: Point incorporated in surrogate model (num_pts x dim) :ivar fX: Function values in surrogate model (num_pts x 1) :ivar updated: True if model is up-to-date (no refit needed) :ivar model: Earth object """ def __init__(self, dim): self.num_pts = 0 self.X = np.empty([0, dim]) self.fX = np.empty([0, 1]) self.dim = dim self.updated = False try: from pyearth import Earth self.model = Earth() except ImportError as err: print("Failed to import pyearth") raise err def _fit(self): """Compute new coefficients if the MARS interpolant is not updated.""" warnings.simplefilter("ignore") # Surpress deprecation warnings if self.updated is False: self.model.fit(self.X, self.fX) self.updated = True def predict(self, xx): """Evaluate the MARS interpolant at the points xx :param xx: Prediction points, must be of size num_pts x dim or (dim, ) :type xx: numpy.ndarray :return: Prediction of size num_pts x 1 :rtype: numpy.ndarray """ self._fit() xx = np.atleast_2d(xx) return np.expand_dims(self.model.predict(xx), axis=1) def predict_deriv(self, xx): """Evaluate the derivative of the MARS interpolant at points xx :param xx: Prediction points, must be of size num_pts x dim or (dim, ) :type xx: numpy.array :return: Derivative of the RBF interpolant at xx :rtype: numpy.array """ self._fit() xx = np.expand_dims(xx, axis=0) dfx = self.model.predict_deriv(xx, variables=None) return dfx[0]
class MARSInterpolant(Earth): """Compute and evaluate a MARS interpolant MARS builds a model of the form .. math:: \hat{f}(x) = \sum_{i=1}^{k} c_i B_i(x). The model is a weighted sum of basis functions :math:`B_i(x)`. Each basis function :math:`B_i(x)` takes one of the following three forms: 1. a constant 1. 2. a hinge function of the form :math:`\max(0, x - const)` or \ :math:`\max(0, const - x)`. MARS automatically selects variables \ and values of those variables for knots of the hinge functions. 3. a product of two or more hinge functions. These basis functions c \ an model interaction between two or more variables. :param maxp: Initial capacity :type maxp: int :ivar nump: Current number of points :ivar maxp: Initial maximum number of points (can grow) :ivar x: Interpolation points :ivar fx: Function evaluations of interpolation points :ivar dim: Number of dimensions :ivar model: MARS interpolation model """ def __init__(self, maxp=100): self.nump = 0 self.maxp = maxp self.x = None # pylint: disable=invalid-name self.fx = None self.dim = None self.model = Earth() self.updated = False def reset(self): """Reset the interpolation.""" self.nump = 0 self.x = None self.fx = None self.updated = False def _alloc(self, dim): """Allocate storage for x, fx, rhs, and A. :param dim: Number of dimensions :type dim: int """ maxp = self.maxp self.dim = dim self.x = np.zeros((maxp, dim)) self.fx = np.zeros((maxp, 1)) def _realloc(self, dim, extra=1): """Expand allocation to accommodate more points (if needed) :param dim: Number of dimensions :type dim: int :param extra: Number of additional points to accommodate :type extra: int """ if self.nump == 0: self._alloc(dim) elif self.nump + extra > self.maxp: self.maxp = max(self.maxp * 2, self.maxp + extra) self.x.resize((self.maxp, dim)) self.fx.resize((self.maxp, 1)) def get_x(self): """Get the list of data points :return: List of data points :rtype: numpy.array """ return self.x[:self.nump, :] def get_fx(self): """Get the list of function values for the data points. :return: List of function values :rtype: numpy.array """ return self.fx[:self.nump, :] def add_point(self, xx, fx): """Add a new function evaluation :param xx: Point to add :type xx: numpy.array :param fx: The function value of the point to add :type fx: float """ dim = len(xx) self._realloc(dim) self.x[self.nump, :] = xx self.fx[self.nump, :] = fx self.nump += 1 self.updated = False def eval(self, x, ds=None): """Evaluate the MARS interpolant at the point x :param x: Point where to evaluate :type x: numpy.array :param ds: Not used :type ds: None :return: Value of the MARS interpolant at x :rtype: float """ if self.updated is False: self.model.fit(self.get_x(), self.get_fx()) self.updated = True x = np.expand_dims(x, axis=0) fx = self.model.predict(x) return fx[0] def evals(self, x, ds=None): """Evaluate the MARS interpolant at the points x :param x: Points where to evaluate, of size npts x dim :type x: numpy.array :param ds: Not used :type ds: None :return: Values of the MARS interpolant at x, of length npts :rtype: numpy.array """ if self.updated is False: self.model.fit(self.get_x(), self.get_fx()) self.updated = True fx = np.zeros(shape=(x.shape[0], 1)) fx[:, 0] = self.model.predict(x) return fx def deriv(self, x, ds=None): """Evaluate the derivative of the MARS interpolant at a point x :param x: Point for which we want to compute the MARS gradient :type x: numpy.array :param ds: Not used :type ds: None :return: Derivative of the MARS interpolant at x :rtype: numpy.array """ if self.updated is False: self.model.fit(self.get_x(), self.get_fx()) self.updated = True x = np.expand_dims(x, axis=0) dfx = self.model.predict_deriv(x, variables=None) return dfx[0]
#drawCumulativeHist(y,'PM2.5','Frequency','Curve cumulative of PM2.5') ##箱图 #drawBox(y,'PM2.5','BOX of PM2.5') ##print y.shape ##重新对y进行shape塑造,方便后面的计算,从这里开始y是reshape之后的y y=y.reshape(-1,1)#不影响结果 #拟合 #1)Fit an Earth model model = Earth() model.fit(X,y) #这里用的是标准化之后的数据 #2)Print the model模型结果 print(model.trace()) print(model.summary()) #3)预测的y y_hat = model.predict(X) #print y_hat #print'RMSE',numpy.sqrt(metrics.mean_squared_error(y, y_hat)) #print'MSE',metrics.mean_squared_error(y, y_hat) #绘图显示 pyplot.figure(figsize=(12,6)) pyplot.plot(X,y,'m+',label='original values') pyplot.plot(X,y_hat,'b.',label='polyfit values') pyplot.legend(loc=4) #指定legend的位置右下角 #设置坐标轴刻度 my_x_ticks = numpy.arange(0,3.5,0.5) my_y_ticks = numpy.arange(0,600,50) pyplot.xticks(my_x_ticks) pyplot.yticks(my_y_ticks)
np.random.seed(1) m = 1000 n = 5 X = np.random.normal(size=(m,n)) # Make X[:,1] binary X[:,1] = np.random.binomial(1,.5,size=m) # The response is a linear function of the inputs y = 2 * X[:,0] + 3 * X[:,1] + np.random.normal(size=m) # Fit the earth model model = Earth().fit(X, y) # Print the model summary, showing linear terms print model.summary() # Plot for both values of X[:,1] y_hat = model.predict(X) plt.figure() plt.plot(X[:,0], y, 'k.') plt.plot(X[X[:,1] == 0, 0], y_hat[X[:,1] == 0], 'r.', label='$x_1 = 0$') plt.plot(X[X[:,1] == 1, 0], y_hat[X[:,1] == 1], 'b.', label='$x_1 = 1$') plt.legend(loc='best') plt.xlabel('$x_0$') plt.show()
'Old Qual_Score', 'Old Avg_Position', 'Old Impressions', 'New Impressions', #'New Avg_CPC', 'Old Avg_CPC', 'New Keyword Density', 'Old Keyword Density', 'New Value_Click', 'Old Value_Click']] #Print the model print model.trace() print model.summary() y_cpc_hat = model.predict(X_cpc_hat) #Plot the model pyplot.figure() pyplot.plot(y_cpc,'r.') pyplot.plot(y_cpc_hat,'b.') pyplot.xlabel('x') pyplot.ylabel('y') pyplot.title('MARS Regression') pyplot.show() ''' #Build Conv_Rate Model #Build conv table