Exemple #1
0
def run(data, in_alpha, in_l1_ratio, run_origin="localRun"):

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    # Set default values if no alpha is provided
    if float(in_alpha) is None:
        alpha = 0.5
    else:
        alpha = float(in_alpha)

    # Set default values if no l1_ratio is provided
    if float(in_l1_ratio) is None:
        l1_ratio = 0.5
    else:
        l1_ratio = float(in_l1_ratio)

    # Useful for multiple runs (only doing one run in this sample notebook)
    with mlflow.start_run(run_name=run_origin) as run:

        # Execute ElasticNet
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        # Evaluate Metrics
        predicted_qualities = lr.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        # Print out metrics
        print("runId: ", run.info.run_id)
        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)
        print("  hyperparameters: ", lr.get_params())

        # Log parameter, metrics, and model to MLflow
        mlflow.log_params(lr.get_params())
        mlflow.log_metrics({"rmse": rmse, "r2": r2, "mae": mae})
        mlflow.set_tags({"run_origin": run_origin})
 def model_el_net(args, y):
     alpha = 0.1
     l1_ratio = 0.7
     enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
     enet.fit(args, y)
     res = enet.score(args, y)
     params = enet.get_params()
     coefs = [enet.intercept_]
     coefs = coefs + list(enet.coef_)
     return res, params, coefs
def init_model(model="", parameters={}):
    new_params = {}
    if model == "elastic_net":
        regressor = ElasticNet()
    elif model == "sgd_regressor":
        regressor = SGDRegressor()
    elif model == "ridge":
        regressor = Ridge()
    elif model == 'neural_network':
        return neural_network(parameters)
    else:
        regressor = ElasticNet()
    # get all available parameters
    available_params = set(regressor.get_params().keys()).intersection(
        set(parameters.keys()))
    params = {a_p: parameters[a_p] for a_p in available_params}
    regressor.set_params(**params)
    return regressor
smf.ols(f"fit_err ~ {fac} + I(crisis_dummy * {fac})", liqFactors).fit().summary()
smf.ols(f"Noise_bp ~ {fac} + I(crisis_dummy * {fac})", liqFactors).fit().summary()
smf.ols(f"price_err ~ {fac} + I(crisis_dummy * {fac})", liqFactors).fit().summary()

smf.ols(f"fit_err ~ {fac} + I(wide_crisis_dummy * {fac})", liqFactors).fit().summary()
smf.ols(f"Noise_bp ~ {fac} + I(wide_crisis_dummy * {fac})", liqFactors).fit().summary()
smf.ols(f"price_err ~ {fac} + I(wide_crisis_dummy * {fac})", liqFactors).fit().summary()


from sklearn.linear_model import ElasticNet
sk_mat = liqFactors[['fit_err', 'treas3m', 'prem_5y', 'prem_10y', 'BondVol', 'Libor', 'Baa_Aaa', 'VIX', 'ValueWeightedMKT']].dropna()
sk_mat = liqFactors[['fit_err', 'treas3m', 'prem_5y', 'BondVol', 'Libor', 'Baa_Aaa', 'VIX', 'ValueWeightedMKT']].dropna()
sk_mat = ((sk_mat - sk_mat.mean()) / sk_mat.std()).as_matrix()
en = ElasticNet(alpha=0.4, l1_ratio=0.5).fit(sk_mat[:,1:], sk_mat[:,0])
print(en.coef_)
print(en.get_params())



smf.ols(f"fit_err ~ prem_10y", liqFactors).fit().summary()
smf.ols(f"fit_err ~ prem_5y", liqFactors).fit().summary()
smf.ols(f"fit_err ~ prem_5y + crisis_dummy", liqFactors).fit().summary()


s = smf.ols(f"fit_err ~ prem_10y", otr_prem_monthly).fit(); print(s.summary())
s = smf.ols(f"fit_err ~ prem_5y", otr_prem_monthly).fit(); print(s.summary())
ax.cla()
ax.plot(otr_prem_monthly.index, s.fittedvalues)
ax.plot(otr_prem_monthly.fit_err)

glmnet_best_params = glmnet.get_params()

#%%

# Defining the method for crossvalidation. We crossvalidate each individual row
crossvalidation = KFold(n_splits=70, shuffle=True, random_state=seed)

# Defining list of scoring methods
scoring = ["neg_mean_squared_error", "neg_mean_absolute_error"]

#%%

glmnet_model = ElasticNet()
glmnet_best_params_matching = {
    key: glmnet_best_params[key]
    for key in glmnet_model.get_params().keys() if key in glmnet_best_params
}

# manual tuning so that things work
glmnet_best_params_matching["precompute"] = False

pipelines = []

pipelines.append(
    ("GLMNET", make_pipeline(ElasticNet(**glmnet_best_params_matching))))

#%%

plot_cv_predictions(
    pipelines=pipelines,
    X=X,
Exemple #6
0
class ElasticNet(Model):

    # X represents the features, Y represents the labels
    X = None
    Y = None
    prediction = None
    model = None

    def __init__(self):
        pass

    def __init__(self, X=None, Y=None, label_headers=None,  l1_ratio=1, type='regressor', cfg=False):

        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        self.type = type
        self.cfg = cfg

        self.mapping_dict = None
        self.label_headers = label_headers

        self.model = ElasticNetModel(l1_ratio=l1_ratio)


    def fit(self, X=None, Y=None):
        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        if self.type == 'classifier':
            self.Y = self.map_str_to_number(self.Y)

        print('ElasticNet Train started............')
        self.model.fit(self.X, self.Y)
        print('ElasticNet completed..........')

        return self.model

    def predict(self, test_features):
        print('Prediction started............')
        self.predictions = self.model.predict(test_features)
        if self.type == 'classifier':
            predictions = predictions.round()
        print('Prediction completed..........')
        return self.predictions


    def save(self):
        if self.cfg:
            f = open('elasticnet_configs.txt', 'w')
            f.write(json.dumps(self.model.get_params()))
            f.close()
        print('No models will be saved for elasticnet')

    def featureImportance(self):

        return self.model.coef_

    def map_str_to_number(self, Y):
        mapping_flag = False
        if self.mapping_dict is not None:
            for label_header in self.label_headers:
                Y[label_header] = Y[label_header].map(self.mapping_dict)
            return Y

        mapping_dict = None
        for label_header in self.label_headers:
            check_list = pd.Series(Y[label_header])
            for item in check_list:
                if type(item) == str:
                    mapping_flag = True
                    break
            if mapping_flag:
                classes = Y[label_header].unique()
                mapping_dict = {}
                index = 0
                for c in classes:
                    mapping_dict[c] = index
                    index += 1

                Y[label_header] = Y[label_header].map(mapping_dict)
                mapping_flag = False

        self.mapping_dict = mapping_dict
        return Y

    def map_number_to_str(self, Y, classes):
        Y = Y.round()
        Y = Y.astype(int)
        if self.mapping_dict is not None:
            mapping_dict = self.mapping_dict
        else:
            mapping_dict = {}
            index = 0
            for c in classes:
                mapping_dict[index] = c
                index += 1

        inv_map = {v: k for k, v in mapping_dict.items()}
        return Y.map(inv_map)


    def getAccuracy(self, test_labels, predictions, origin=0, hitmissr=0.8):
        if self.type == 'classifier':
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            test_labels = self.map_str_to_number(test_labels.copy())
            for i in range(len(df)):
                if (df.values[i] == test_labels.values[i]):
                    correct = correct + 1
        else:
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            for i in range(len(df)):
                if 1 - abs(df.values[i] - test_labels.values[i])/abs(df.values[i]) >= hitmissr:
                    correct = correct + 1
        return float(correct)/len(df)

    def getConfusionMatrix(self, test_labels, predictions, label_headers):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'classifier':
            index = 0
            for label_header in label_headers:
                classes = test_labels[label_header].unique()
                df_tmp = self.map_number_to_str(df.ix[:,index], classes)
                title = 'Normalized confusion matrix for NeuralNetwork (' + label_header + ')'
                self.plot_confusion_matrix(test_labels.ix[:,index], df_tmp, classes=classes, normalize=True,
                          title=title)
                index = index + 1
        else:
            return 'No Confusion Matrix for Regression'

    def getROC(self, test_labels, predictions, label_headers):
        predictions=pd.DataFrame(data=predictions.flatten())
        predictions.columns=test_labels.columns.values
        if self.type == 'classifier':
            test_labels = self.map_str_to_number(test_labels)
            fpr, tpr, _ = roc_curve(test_labels, predictions)
            plt.figure(1)
            plt.plot([0, 1], [0, 1], 'k--')
            plt.plot(fpr, tpr)
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve')
            plt.show()
        else:
            return 'No Confusion Matrix for Regression'

    def getRSquare(self, test_labels, predictions, mode='single'):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            if mode == 'multiple':
                errors = r2_score(test_labels, df, multioutput='variance_weighted')
            else:
                errors = r2_score(test_labels, df)
            return errors
        else:
            return 'No RSquare for Classification'

    def getMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = mean_squared_error(test_labels, df)
            return errors
        else:
            return 'No MSE for Classification'

    def getMAPE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = np.mean(np.abs((test_labels - df.values) / test_labels)) * 100
            return errors.values[0]
        else:
            return 'No MAPE for Classification'

    def getRMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = sqrt(mean_squared_error(test_labels, df))
            return errors
        else:
            return 'No RMSE for Classification'
class LinearRegression(Model):
    def __init__(self,
                 past_X,
                 y,
                 future_X,
                 energy,
                 region_name,
                 random_state=0,
                 l1_ratio=.9,
                 normalize=False,
                 max_iter=30000,
                 selection='random',
                 alpha=1.0):
        super().__init__(past_X, y, future_X, energy, region_name)
        self.random_state = random_state
        self.l1_ratio = l1_ratio
        self.normalize = normalize
        self.max_iter = max_iter
        self.selection = selection
        self.alpha = alpha
        self.regr = ElasticNet(
            random_state=self.random_state,
            l1_ratio=self.l1_ratio,  # combination of l1 and l2 penalty
            normalize=self.normalize,
            max_iter=self.max_iter,
            selection=self.
            selection,  # coefficients updated in random order (faster)
            alpha=self.alpha,
        )
        self.indeps = None
        self.predictions = None

    def fit(self, indeps=None, verbose=False):
        x = self.past_X.copy()
        if indeps:
            self.indeps = indeps
            x = self.past_X[self.indeps]
        self.regr.fit(x, self.y)
        if verbose:
            print('Regression Parameters: ', self.regr.get_params())
            print('Parameter Coefficients: ', self.regr.coef_)
            print('Regression intercept: ', self.regr.intercept_)
        print(
            f'R2 for {self.region_name} {self.energy}: {self.regr.score(x, self.y)}'
        )

    def predict(self,
                gdp_type=None,
                indeps=None,
                verbose=False,
                past_yrs=None):
        # yrs is optional list of years to predict from the past
        x = self.future_X.copy()
        if past_yrs:
            x = self.past_X[self.past_X.index.year in past_yrs]
        if self.indeps:
            x = self.future_X[self.indeps]
        if gdp_type:
            x = self._add_gdp_type(x, gdp_type)
        self.predictions = self.regr.predict(x)
        return self.predictions

    def get_predictions_train_test(self, indeps=None, test_yr=2019):
        regr = ElasticNet(
            random_state=self.random_state,
            l1_ratio=self.l1_ratio,  # combination of l1 and l2 penalty
            normalize=self.normalize,
            max_iter=self.max_iter,
            selection=self.
            selection,  # coefficients updated in random order (faster)
            alpha=self.alpha,
        )

        train_X, test_X, train_y, test_y = self.get_train_test_split(test_yr)
        train_X = self._get_indep(train_X, indeps)
        test_X = self._get_indep(test_X, indeps)

        regr.fit(train_X, train_y)
        return regr.predict(test_X), test_y