def run(data, in_alpha, in_l1_ratio, run_origin="localRun"): # Split the data into training and test sets. (0.75, 0.25) split. train, test = train_test_split(data) # The predicted column is "quality" which is a scalar from [3, 9] train_x = train.drop(["quality"], axis=1) test_x = test.drop(["quality"], axis=1) train_y = train[["quality"]] test_y = test[["quality"]] # Set default values if no alpha is provided if float(in_alpha) is None: alpha = 0.5 else: alpha = float(in_alpha) # Set default values if no l1_ratio is provided if float(in_l1_ratio) is None: l1_ratio = 0.5 else: l1_ratio = float(in_l1_ratio) # Useful for multiple runs (only doing one run in this sample notebook) with mlflow.start_run(run_name=run_origin) as run: # Execute ElasticNet lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) lr.fit(train_x, train_y) # Evaluate Metrics predicted_qualities = lr.predict(test_x) (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) # Print out metrics print("runId: ", run.info.run_id) print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) print(" RMSE: %s" % rmse) print(" MAE: %s" % mae) print(" R2: %s" % r2) print(" hyperparameters: ", lr.get_params()) # Log parameter, metrics, and model to MLflow mlflow.log_params(lr.get_params()) mlflow.log_metrics({"rmse": rmse, "r2": r2, "mae": mae}) mlflow.set_tags({"run_origin": run_origin})
def model_el_net(args, y): alpha = 0.1 l1_ratio = 0.7 enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio) enet.fit(args, y) res = enet.score(args, y) params = enet.get_params() coefs = [enet.intercept_] coefs = coefs + list(enet.coef_) return res, params, coefs
def init_model(model="", parameters={}): new_params = {} if model == "elastic_net": regressor = ElasticNet() elif model == "sgd_regressor": regressor = SGDRegressor() elif model == "ridge": regressor = Ridge() elif model == 'neural_network': return neural_network(parameters) else: regressor = ElasticNet() # get all available parameters available_params = set(regressor.get_params().keys()).intersection( set(parameters.keys())) params = {a_p: parameters[a_p] for a_p in available_params} regressor.set_params(**params) return regressor
smf.ols(f"fit_err ~ {fac} + I(crisis_dummy * {fac})", liqFactors).fit().summary() smf.ols(f"Noise_bp ~ {fac} + I(crisis_dummy * {fac})", liqFactors).fit().summary() smf.ols(f"price_err ~ {fac} + I(crisis_dummy * {fac})", liqFactors).fit().summary() smf.ols(f"fit_err ~ {fac} + I(wide_crisis_dummy * {fac})", liqFactors).fit().summary() smf.ols(f"Noise_bp ~ {fac} + I(wide_crisis_dummy * {fac})", liqFactors).fit().summary() smf.ols(f"price_err ~ {fac} + I(wide_crisis_dummy * {fac})", liqFactors).fit().summary() from sklearn.linear_model import ElasticNet sk_mat = liqFactors[['fit_err', 'treas3m', 'prem_5y', 'prem_10y', 'BondVol', 'Libor', 'Baa_Aaa', 'VIX', 'ValueWeightedMKT']].dropna() sk_mat = liqFactors[['fit_err', 'treas3m', 'prem_5y', 'BondVol', 'Libor', 'Baa_Aaa', 'VIX', 'ValueWeightedMKT']].dropna() sk_mat = ((sk_mat - sk_mat.mean()) / sk_mat.std()).as_matrix() en = ElasticNet(alpha=0.4, l1_ratio=0.5).fit(sk_mat[:,1:], sk_mat[:,0]) print(en.coef_) print(en.get_params()) smf.ols(f"fit_err ~ prem_10y", liqFactors).fit().summary() smf.ols(f"fit_err ~ prem_5y", liqFactors).fit().summary() smf.ols(f"fit_err ~ prem_5y + crisis_dummy", liqFactors).fit().summary() s = smf.ols(f"fit_err ~ prem_10y", otr_prem_monthly).fit(); print(s.summary()) s = smf.ols(f"fit_err ~ prem_5y", otr_prem_monthly).fit(); print(s.summary()) ax.cla() ax.plot(otr_prem_monthly.index, s.fittedvalues) ax.plot(otr_prem_monthly.fit_err)
glmnet_best_params = glmnet.get_params() #%% # Defining the method for crossvalidation. We crossvalidate each individual row crossvalidation = KFold(n_splits=70, shuffle=True, random_state=seed) # Defining list of scoring methods scoring = ["neg_mean_squared_error", "neg_mean_absolute_error"] #%% glmnet_model = ElasticNet() glmnet_best_params_matching = { key: glmnet_best_params[key] for key in glmnet_model.get_params().keys() if key in glmnet_best_params } # manual tuning so that things work glmnet_best_params_matching["precompute"] = False pipelines = [] pipelines.append( ("GLMNET", make_pipeline(ElasticNet(**glmnet_best_params_matching)))) #%% plot_cv_predictions( pipelines=pipelines, X=X,
class ElasticNet(Model): # X represents the features, Y represents the labels X = None Y = None prediction = None model = None def __init__(self): pass def __init__(self, X=None, Y=None, label_headers=None, l1_ratio=1, type='regressor', cfg=False): if X is not None: self.X = X if Y is not None: self.Y = Y self.type = type self.cfg = cfg self.mapping_dict = None self.label_headers = label_headers self.model = ElasticNetModel(l1_ratio=l1_ratio) def fit(self, X=None, Y=None): if X is not None: self.X = X if Y is not None: self.Y = Y if self.type == 'classifier': self.Y = self.map_str_to_number(self.Y) print('ElasticNet Train started............') self.model.fit(self.X, self.Y) print('ElasticNet completed..........') return self.model def predict(self, test_features): print('Prediction started............') self.predictions = self.model.predict(test_features) if self.type == 'classifier': predictions = predictions.round() print('Prediction completed..........') return self.predictions def save(self): if self.cfg: f = open('elasticnet_configs.txt', 'w') f.write(json.dumps(self.model.get_params())) f.close() print('No models will be saved for elasticnet') def featureImportance(self): return self.model.coef_ def map_str_to_number(self, Y): mapping_flag = False if self.mapping_dict is not None: for label_header in self.label_headers: Y[label_header] = Y[label_header].map(self.mapping_dict) return Y mapping_dict = None for label_header in self.label_headers: check_list = pd.Series(Y[label_header]) for item in check_list: if type(item) == str: mapping_flag = True break if mapping_flag: classes = Y[label_header].unique() mapping_dict = {} index = 0 for c in classes: mapping_dict[c] = index index += 1 Y[label_header] = Y[label_header].map(mapping_dict) mapping_flag = False self.mapping_dict = mapping_dict return Y def map_number_to_str(self, Y, classes): Y = Y.round() Y = Y.astype(int) if self.mapping_dict is not None: mapping_dict = self.mapping_dict else: mapping_dict = {} index = 0 for c in classes: mapping_dict[index] = c index += 1 inv_map = {v: k for k, v in mapping_dict.items()} return Y.map(inv_map) def getAccuracy(self, test_labels, predictions, origin=0, hitmissr=0.8): if self.type == 'classifier': correct = 0 df = pd.DataFrame(data=predictions.flatten()) test_labels = self.map_str_to_number(test_labels.copy()) for i in range(len(df)): if (df.values[i] == test_labels.values[i]): correct = correct + 1 else: correct = 0 df = pd.DataFrame(data=predictions.flatten()) for i in range(len(df)): if 1 - abs(df.values[i] - test_labels.values[i])/abs(df.values[i]) >= hitmissr: correct = correct + 1 return float(correct)/len(df) def getConfusionMatrix(self, test_labels, predictions, label_headers): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'classifier': index = 0 for label_header in label_headers: classes = test_labels[label_header].unique() df_tmp = self.map_number_to_str(df.ix[:,index], classes) title = 'Normalized confusion matrix for NeuralNetwork (' + label_header + ')' self.plot_confusion_matrix(test_labels.ix[:,index], df_tmp, classes=classes, normalize=True, title=title) index = index + 1 else: return 'No Confusion Matrix for Regression' def getROC(self, test_labels, predictions, label_headers): predictions=pd.DataFrame(data=predictions.flatten()) predictions.columns=test_labels.columns.values if self.type == 'classifier': test_labels = self.map_str_to_number(test_labels) fpr, tpr, _ = roc_curve(test_labels, predictions) plt.figure(1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr, tpr) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve') plt.show() else: return 'No Confusion Matrix for Regression' def getRSquare(self, test_labels, predictions, mode='single'): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': if mode == 'multiple': errors = r2_score(test_labels, df, multioutput='variance_weighted') else: errors = r2_score(test_labels, df) return errors else: return 'No RSquare for Classification' def getMSE(self, test_labels, predictions): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': errors = mean_squared_error(test_labels, df) return errors else: return 'No MSE for Classification' def getMAPE(self, test_labels, predictions): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': errors = np.mean(np.abs((test_labels - df.values) / test_labels)) * 100 return errors.values[0] else: return 'No MAPE for Classification' def getRMSE(self, test_labels, predictions): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': errors = sqrt(mean_squared_error(test_labels, df)) return errors else: return 'No RMSE for Classification'
class LinearRegression(Model): def __init__(self, past_X, y, future_X, energy, region_name, random_state=0, l1_ratio=.9, normalize=False, max_iter=30000, selection='random', alpha=1.0): super().__init__(past_X, y, future_X, energy, region_name) self.random_state = random_state self.l1_ratio = l1_ratio self.normalize = normalize self.max_iter = max_iter self.selection = selection self.alpha = alpha self.regr = ElasticNet( random_state=self.random_state, l1_ratio=self.l1_ratio, # combination of l1 and l2 penalty normalize=self.normalize, max_iter=self.max_iter, selection=self. selection, # coefficients updated in random order (faster) alpha=self.alpha, ) self.indeps = None self.predictions = None def fit(self, indeps=None, verbose=False): x = self.past_X.copy() if indeps: self.indeps = indeps x = self.past_X[self.indeps] self.regr.fit(x, self.y) if verbose: print('Regression Parameters: ', self.regr.get_params()) print('Parameter Coefficients: ', self.regr.coef_) print('Regression intercept: ', self.regr.intercept_) print( f'R2 for {self.region_name} {self.energy}: {self.regr.score(x, self.y)}' ) def predict(self, gdp_type=None, indeps=None, verbose=False, past_yrs=None): # yrs is optional list of years to predict from the past x = self.future_X.copy() if past_yrs: x = self.past_X[self.past_X.index.year in past_yrs] if self.indeps: x = self.future_X[self.indeps] if gdp_type: x = self._add_gdp_type(x, gdp_type) self.predictions = self.regr.predict(x) return self.predictions def get_predictions_train_test(self, indeps=None, test_yr=2019): regr = ElasticNet( random_state=self.random_state, l1_ratio=self.l1_ratio, # combination of l1 and l2 penalty normalize=self.normalize, max_iter=self.max_iter, selection=self. selection, # coefficients updated in random order (faster) alpha=self.alpha, ) train_X, test_X, train_y, test_y = self.get_train_test_split(test_yr) train_X = self._get_indep(train_X, indeps) test_X = self._get_indep(test_X, indeps) regr.fit(train_X, train_y) return regr.predict(test_X), test_y