예제 #1
0
    def fit(self, X, y):
        print "Fitting a multiclass ElasticNet regressor..."
        assert (y.shape[1] == 6)

        self.standardizer = preprocessing.StandardScaler()
        X = self.standardizer.fit_transform(X)
        cv = model_selection.ShuffleSplit(n_splits=5,
                                          test_size=0.2,
                                          random_state=0)

        alpha_range = [
            0.005, 0.007, 0.002, 0.0025, 0.004, 0.003, 0.0035877427142009029,
            0.01, 0.001
        ]
        param_grid = []
        param_grid.append(
            dict(alpha=alpha_range,
                 l1_ratio=[.1, .2, .25, .3, .35, .4, .5, .6, .65, .7, .8],
                 normalize=[True],
                 max_iter=[10000]))
        print "Using param grid " + str(param_grid)
        self.clf = model_selection.GridSearchCV(
            linear_model.MultiTaskElasticNet(),
            param_grid=param_grid,
            cv=cv,
            n_jobs=12)
        self.clf.fit(X, y)
        print "Best params: " + str(
            self.clf.best_params_) + " and corresponding score is " + str(
                self.clf.best_score_)
예제 #2
0
def start_ltm_multi(tup, taus, w=0.1, alpha=0.001, **kwargs):
    X = _make_base(tup, taus, w=w)
    mod = lm.MultiTaskElasticNet(alpha=alpha, **kwargs)
    mod.max_iter = 5e4
    mod.verbose = 0
    mod.fit_intercept = 0
    mod.normalize = 1
    mod.fit(X, tup.data)

    fit = mod.predict(X)
    coefs = mod.coef_
    return mod, coefs, fit, None
예제 #3
0
 def test_model_multi_task_elasticnet(self):
     model, X = fit_regression_model(linear_model.MultiTaskElasticNet(),
                                     n_targets=2)
     model_onnx = convert_sklearn(
         model,
         "multi-task elasticnet",
         [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         verbose=False,
                         basename="SklearnMultiTaskElasticNet-Dec4")
예제 #4
0
def LDM(data, irf_fwhm=0.1, n_taus=None, alpha=1, p=0, cv=False, max_iter=1e4, lim_log=(None, None)):

    """irf_fwhm=0.1 is in ps in default, the same as data time units """

    assert type(data) == Data

    dt = data.times[1] - data.times[0]
    max_t = data.times[-1]
    start = np.floor(np.log10(dt))
    end = np.ceil(np.log10(max_t))
    n = int(30 * (end - start)) if n_taus is None else n_taus
    lim_log = (start if lim_log[0] is None else lim_log[0], end if lim_log[1] is None else lim_log[1])

    taus = np.logspace(lim_log[0], lim_log[1], n, endpoint=True)

    X = _X(taus, data.times, irf_fwhm)

    if p == 0:
        if not cv:
            mod = lm.Ridge(alpha=alpha,
                           max_iter=None,
                           solver='svd')
        else:
            mod = lm.RidgeCV()
    else:
        mod = lm.MultiTaskElasticNet(alpha=alpha,
                                     l1_ratio=p,
                                     max_iter=max_iter)
    mod.verbose = 0
    mod.fit_intercept = False
    mod.copy_X = True

    #     coefs = np.empty((n, data.wavelengths.shape[0]))
    #     fit = np.empty_like(data.D)
    #     alphas = np.empty(data.wavelengths.shape[0])
    #     for i in range(data.wavelengths.shape[0]):
    #         mod.fit(X, data.D[:, i])
    #         coefs[:, i] = mod.coef_.copy()
    #         fit[:, i] = mod.predict(X)
    #         if hasattr(mod, 'alpha_'):
    #             alphas[i] = mod.alpha_

    mod.fit(X, data.D)
    fit = mod.predict(X)
    if hasattr(mod, 'alpha_'):
        alpha = mod.alpha_
    return mod.coef_.T, Data.from_matrix(fit, data.times, data.wavelengths), taus
 def test_model_multi_task_elasticnet(self):
     model, X = fit_regression_model(linear_model.MultiTaskElasticNet(),
                                     n_targets=2)
     model_onnx = convert_sklearn(
         model, "multi-task elasticnet",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X,
         model,
         model_onnx,
         verbose=False,
         basename="SklearnMultiTaskElasticNet-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )
예제 #6
0
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest):
    modelForConsideration: DataFrame = pd.DataFrame()
    LinerModels = \
        [
            linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(),
            linear_model.ElasticNetCV(),
            linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(),
            linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(),
            linear_model.LinearRegression(), linear_model.MultiTaskLasso(),
            linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(),
            linear_model.OrthogonalMatchingPursuit(),
            linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(),
            linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(),
            linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(),
            linear_model.RidgeClassifierCV(),
            linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.enet_path(xTrain, yTrain),
            linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain),
            # linear_model.LogisticRegression()
            # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression()
        ]
    for model in LinerModels:
        modelName: str = model.__class__.__name__
        try:
            # print(f"Preparing Model {modelName}")
            if modelName == "LogisticRegression":
                model = linear_model.LogisticRegression(random_state=0)
            model.fit(xTrain, yTrain)
            yTrainPredict = model.predict(xTrain)
            yTestPredict = model.predict(xTest)
            errorList = calculate_prediction_error(modelName, yTestPredict,
                                                   yTest, yTrainPredict,
                                                   yTrain)

            if errorList["Test Average Error"][0] < 30 and errorList[
                    "Train Average Error"][0] < 30:
                try:
                    modelForConsideration = modelForConsideration.append(
                        errorList)
                except (Exception) as e:
                    print(e)

        except (Exception, ArithmeticError) as e:
            print(f"Error occurred while preparing Model {modelName}")
    return modelForConsideration
예제 #7
0
def multi_task_elastic_net(X, q, cv=False, alpha=0.0038, l1_ratio=0.632):
    '''
    Multi Task Elastic Net with dimensions forced to share features
    both l1 and l2 regularization is employed in the Elastic Net formulation

    Running cross-val gives alpha = 0.0038, l1_ratio = 0.632
    '''
    if cv:
        l1_ratio_list = np.linspace(0.1, 1.0, 10)
        #l1_ratio_list = 1-np.exp(-np.arange(1, 10)/2.0)
        clf = lm.MultiTaskElasticNetCV(l1_ratio=l1_ratio_list, eps=1e-3, n_alphas=100, alphas=None,
                                       fit_intercept=False, cv=3, verbose=True, n_jobs=-1)
    else:
        clf = lm.MultiTaskElasticNet(
            alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False)
    clf.fit(X, q)
    theta = clf.coef_.T
    res = q - np.dot(X, theta)
    return theta, res
from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression
X, y = make_regression(n_features=2, random_state=0)
print("Create dataset X with prediction y:\n", X[:1], y[:1])
regr = ElasticNet(random_state=0)
print("Create ElasticNet model:\n", regr)
print("Train ElasticNet model:\n", regr.fit(X, y))
print("Get the coef W:\n", regr.coef_)
print("Get the intercept alpha:\n", regr.intercept_)
print("Get the score:\n", regr.predict([0, 0]))
print("Useful when there are multiple features which are correlated with one another")
print("-" * 200)
print("\t"*1 + "1.1.6 Multi task Elastic-Net")
print("It use mixed L1 and L2-norm and L2-norm for regularization")
from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
print("Create multitaskelasticnet model:\n", clf)
print("Training of multitaskelasticnet model:\n", clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]]))
print("Get the coefficient W:\n", clf.coef_)
print("Get the interception alpha:\n", clf.intercept_)
print("It estimate sparse coefficients for multiple regression problems too")
print("-" * 200)
print("\t"*1 + "1.1.7 Least Angle Regression")
print("It proceeds in a direction equiangular between features during each step of the regression")
from sklearn import linear_model
reg = linear_model.Lars(n_nonzero_coefs=1)
print("Create Lars model:\n", reg)
print("Train Lars model:\n", reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111]))
print("Get the coef W:\n", reg.coef_)
print("It is sensitive to the effects of noise")
print("-" * 200)
예제 #9
0
plt.plot(user_forecast_response, label="predicted")
plt.plot(actual_values_response, label="actual")
plt.legend()
plt.title("1 January 2016")
plt.ylabel("Solar Power (W/m2)")
plt.xlabel("Hour")

#%% [markdown]
# Linear regression is not bad but we an do better!
#%% [markdown]
# ## MultiTask ElasticNet Regression

#%%
# training a multi-task elastic net model
_prng = np.random.RandomState(42)
elastic_net = linear_model.MultiTaskElasticNet(random_state=_prng)
elastic_net.fit(transformed_training_features, training_target)

#%%
# measure training error
_predictions = elastic_net.predict(transformed_training_features)
np.sqrt(metrics.mean_squared_error(training_target, _predictions))

#%%
# measure validation error
_predictions = elastic_net.predict(transformed_validation_features)
np.sqrt(metrics.mean_squared_error(validation_target, _predictions))

#%%
# user requests forecast for 1 January 2016 which we predict using data from 31 December 2015!
user_forecast_request = transformed_training_features[[-1], :]
예제 #10
0
import src.vector_gen.generate_VectorY as vecY
import src.misc.split_train_valid as split
import src.misc.paths as path
from sklearn import linear_model
import pandas as pd
import numpy as np
from sklearn.externals import joblib
from src.misc import evaluation as eval

np.set_printoptions(threshold=np.nan)

df = pd.read_csv(path.trajectories_training_file2)

#X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X,Y,test_size=0.2)
training, validation, testing = split.split_dataset(df)
X_train = vecX.generate_x_df(training)
Y_train = vecY.generate_VectorY_df(training)

X_test = vecX.generate_x_df(testing)
Y_test = vecY.generate_VectorY_df(testing)

clf = linear_model.MultiTaskElasticNet()
clf.fit(X_train, Y_train)

Y_pred = clf.predict(X_test)

error = eval.mape(Y_pred, Y_test)

print(error)
print(np.mean(np.array(error)))
예제 #11
0
import pytest
from sklearn import linear_model
import numpy
import chaospy

LINEAR_MODELS = {
    "none":
    None,
    "linear":
    linear_model.LinearRegression(fit_intercept=False),
    "elastic_net":
    linear_model.MultiTaskElasticNet(alpha=0.0001, fit_intercept=False),
    "lasso":
    linear_model.MultiTaskLasso(alpha=0.001, fit_intercept=False),
    "lasso_lars":
    linear_model.LassoLars(alpha=0.0001, fit_intercept=False),
    "lars":
    linear_model.Lars(n_nonzero_coefs=10, fit_intercept=False),
    "matching_pursuit":
    linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=10,
                                           fit_intercept=False),
    "ridge":
    linear_model.Ridge(alpha=0.1, fit_intercept=False),
}


@pytest.fixture
def samples(joint):
    return joint.sample(1000, rule="sobol")

예제 #12
0
from sklearn import linear_model

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
import matplotlib.pyplot as plt
import numpy as np

# 多任务岭回归
x, y = datasets.make_regression(n_samples=1000, n_features=1, n_targets=10, noise=10, random_state=0)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

# 弹性网络
reg = linear_model.MultiTaskElasticNet(0.1) # 多任务弹性网络回归
reg = linear_model.MultiTaskLasso(0.1) # 多任务lasso回归
reg = linear_model.MultiTaskLassoCV(0.1) # 多任务lasso回归
reg = linear_model.MultiTaskElasticNetCV(0.1) # 多任务弹性网络回归


reg.fit(x_train, y_train)

print(reg.coef_, reg.intercept_)

y_pred = reg.predict(x_test)

# 平均绝对误差
print(mean_absolute_error(y_test, y_pred))

# 均方误差
print(mean_squared_error(y_test, y_pred))
예제 #13
0
def get_regression_estimators(r, regression_models):
    if r == 'ARDRegression':
        regression_models[r] = linear_model.ARDRegression()
    elif r == 'BayesianRidge':
        regression_models[r] = linear_model.BayesianRidge()
    elif r == 'ElasticNet':
        regression_models[r] = linear_model.ElasticNet()
    elif r == 'ElasticNetCV':
        regression_models[r] = linear_model.ElasticNetCV()
    elif r == 'HuberRegressor':
        regression_models[r] = linear_model.HuberRegressor()
    elif r == 'Lars':
        regression_models[r] = linear_model.Lars()
    elif r == 'LarsCV':
        regression_models[r] = linear_model.LarsCV()
    elif r == 'Lasso':
        regression_models[r] = linear_model.Lasso()
    elif r == 'LassoCV':
        regression_models[r] = linear_model.LassoCV()
    elif r == 'LassoLars':
        regression_models[r] = linear_model.LassoLars()
    elif r == 'LassoLarsCV':
        regression_models[r] = linear_model.LassoLarsCV()
    elif r == 'LassoLarsIC':
        regression_models[r] = linear_model.LassoLarsIC()
    elif r == 'LinearRegression':
        regression_models[r] = linear_model.LinearRegression()
    elif r == 'LogisticRegression':
        regression_models[r] = linear_model.LogisticRegression()
    elif r == 'LogisticRegressionCV':
        regression_models[r] = linear_model.LogisticRegressionCV()
    elif r == 'MultiTaskElasticNet':
        regression_models[r] = linear_model.MultiTaskElasticNet()
    elif r == 'MultiTaskElasticNetCV':
        regression_models[r] = linear_model.MultiTaskElasticNetCV()
    elif r == 'MultiTaskLasso':
        regression_models[r] = linear_model.MultiTaskLasso()
    elif r == 'MultiTaskLassoCV':
        regression_models[r] = linear_model.MultiTaskLassoCV()
    elif r == 'OrthogonalMatchingPursuit':
        regression_models[r] = linear_model.OrthogonalMatchingPursuit()
    elif r == 'OrthogonalMatchingPursuitCV':
        regression_models[r] = linear_model.OrthogonalMatchingPursuitCV()
    elif r == 'PassiveAggressiveClassifier':
        regression_models[r] = linear_model.PassiveAggressiveClassifier()
    elif r == 'PassiveAggressiveRegressor':
        regression_models[r] = linear_model.PassiveAggressiveRegressor()
    elif r == 'Perceptron':
        regression_models[r] = linear_model.Perceptron()
    elif r == 'RANSACRegressor':
        regression_models[r] = linear_model.RANSACRegressor()
    elif r == 'Ridge':
        regression_models[r] = linear_model.Ridge()
    elif r == 'RidgeClassifier':
        regression_models[r] = linear_model.RidgeClassifier()
    elif r == 'RidgeClassifierCV':
        regression_models[r] = linear_model.RidgeClassifierCV()
    elif r == 'RidgeCV':
        regression_models[r] = linear_model.RidgeCV()
    elif r == 'SGDClassifier':
        regression_models[r] = linear_model.SGDClassifier()
    elif r == 'SGDRegressor':
        regression_models[r] = linear_model.SGDRegressor()
    elif r == 'TheilSenRegressor':
        regression_models[r] = linear_model.TheilSenRegressor()
    else:
        print(
            r +
            " is an unsupported regression type. Check if you have misspelled the name."
        )
예제 #14
0
                                              axis=0)**(0.5)
        rtmnSqDictNetLassoSep = dict([
            (TranscOrder[i], rtmnSqErrorByGeneNetLassoSep[i])
            for i in range(len(rtmnSqErrorByGeneNetLassoSep))
        ])

        for ky in LassoRMSESep.index:
            LassoRMSESep.loc[ky, j] = rtmnSqDictLassoSep[ky]

        for ky in NetLassoRMSESep.index:
            if len(network_bygene[ky]):
                NetLassoRMSESep.loc[ky, j] = rtmnSqDictNetLassoSep[ky]
            else:
                NetLassoRMSESep.loc[ky, j] = 'NA'

        ElasticNet = lm.MultiTaskElasticNet(alpha=parameters["MTENalpha"],
                                            l1_ratio=parameters["MTENl1R"])
        ElasticNet.fit(trainDataIn, trainDataOut)
        predictedTranscript_ElasticNet = np.array(
            ElasticNet.predict(testDataIn))

        SqErrors_EN = (testDataOut - predictedTranscript_ElasticNet)**2
        rtmnSqErrorByGene_EN = np.sum(SqErrors_EN, axis=0)**(0.5)
        rtmnSqDict_EN = dict([(TranscOrder[i], rtmnSqErrorByGene_EN[i])
                              for i in range(len(rtmnSqErrorByGene_EN))])

        for ky in ElasticNetRMSE.index:
            ElasticNetRMSE.loc[ky, j] = rtmnSqDict_EN[ky]

        predictedTranscript_ENSep = np.empty_like(predictedTranscriptLasso)
        predictedTranscript_NetENSep = np.zeros_like(predictedTranscriptLasso)
def ML(regression):

    if regression == 'linear':
        reg = linear_model.LinearRegression()
    elif regression == 'ridge':
        reg = linear_model.Ridge(alpha=1)
    elif regression == 'ridgecv':
        reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0],
                                   cv=5,
                                   gcv_mode=None)
    elif regression == 'multitasklasso':
        reg = linear_model.MultiTaskLasso(alpha=1)
    elif regression == 'multitaskelasticnet':
        reg = linear_model.MultiTaskElasticNet(alpha=1)

    spectralist = np.loadtxt('final1Dfilelist.dat', dtype=str)
    if spectralist.ndim > 1:
        filepaths = spectralist[:, 0]
        resolution = spectralist[:, 1]
    else:
        filepaths = [spectralist[0]]
        resolution = [spectralist[1]]
    directory_name = 'EWmyresults'
    res_file = open('Parameter_Results.dat', 'w')
    res_file.write(
        "# newstars [Fe/H] STD_FeH MAE_[Fe/H] Wide_Error_[Fe/H] Teff STD_Teff MAE_Teff Wide_Error_Teff R2_score EV_score \n"
    )

    MLplots_folder = 'Model_Prediction_Plots'

    if not os.path.exists(MLplots_folder):
        os.makedirs(MLplots_folder)

    for i in np.arange(len(filepaths)):

        df = pd.read_csv('res' + resolution[i] + '_RefEWPar.csv')
        df.dropna(axis=1, inplace=True)

        names = ['names']

        y = df.columns.values[-2:]

        newlines = np.loadtxt('./' + directory_name + filepaths[i].replace(
            '.fits', '').replace('spectra/' + 'newstars/', '') +
                              'centrallines.dat',
                              dtype=str)

        newdf = pd.read_csv('./' + directory_name + filepaths[i].replace(
            '.fits', '').replace('spectra/' + 'newstars/', '') +
                            '_newstars.csv')

        newdf.dropna(axis=1, inplace=True)

        zeroews = newdf.mask(newdf < 0.00001)
        newnames = ['newstars']
        newlabels = newdf[newnames]
        newdf = zeroews.dropna('columns')
        newlines = newdf.columns.values[1::]

        df_x = df.drop(y, axis=1)

        df_y = df[y]

        df_x = pd.concat([df.loc[:, 'names'], df.loc[:, newlines]], axis=1)

        newdf = newdf.loc[:, newlines]

        FeH_list = []
        Teff_list = []
        MAE_FeH_list = []
        MAE_Teff_list = []
        Var_list = []
        R2_list = []

        for k in range(100):

            x_train, x_test, y_train, y_test = train_test_split(df_x,
                                                                df_y,
                                                                test_size=0.30)

            labels_train = x_train[names]
            labels_test = x_test[names]

            x_train.drop(names, axis=1, inplace=True)
            x_test.drop(names, axis=1, inplace=True)

            reg.fit(x_train, y_train)

            joblib.dump(reg, 'savedmodel.pkl')

            y_pred_test = reg.predict(x_test)
            y_pred_train = reg.predict(x_train)

            N = len(y_test)
            starttime = time()
            y_pred_test = reg.predict(x_test)
            elapsedtime = time()
            t = elapsedtime - starttime

            #score_test = reg.score(x_test, y_test)

            variancescore = explained_variance_score(y_test, y_pred_test)

            r2score = r2_score(y_test, y_pred_test)

            reg = joblib.load('savedmodel.pkl')  #loading the saved model

            pred_newdf = reg.predict(newdf)  #applying the saved model

            finalresults = pd.concat(
                [newlabels, pd.DataFrame(pred_newdf)], axis=1)

            #print(finalresults)

            #print('Calculated parameters for {} stars in {:.2f}ms'.format(N, t*1e3))
            #print ('test score:', score_test)
            #print ('variance score:', variancescore)
            #print ('r2score:', r2score)

            mae_train = mean_abso_error(y_train[:], y_pred_train[:])
            mape_train = mean_absolute_percentage_error(
                y_train[:], y_pred_train[:])

            #print('Mean Absolute Error of Train : ' + str(mae_train))
            #print('Mean Absolute Percentage Error of Train : '+ str(mape_train))

            mae_test = mean_abso_error(y_test[:], y_pred_test[:])
            mape_test = mean_absolute_percentage_error(y_test[:],
                                                       y_pred_test[:])

            #print('Mean Absolute Error of Test : ' + str(mae_test))
            #print('Mean Absolute Percentage Error of Test : '+ str(mape_test))

            train_givenvalues = pd.concat([labels_train, y_train], axis=1)
            train_givenvalues = train_givenvalues.reset_index(drop=True)
            new_labeltrain = labels_train.reset_index(drop=True)
            train_predvalues = pd.concat(
                [new_labeltrain, pd.DataFrame(y_pred_train)], axis=1)

            test_givenvalues = pd.concat([labels_test, y_test], axis=1)
            test_givenvalues = test_givenvalues.reset_index(drop=True)
            new_labeltest = labels_test.reset_index(drop=True)
            test_predvalues = pd.concat(
                [new_labeltest, pd.DataFrame(y_pred_test)], axis=1)

            FeH_list.append(finalresults[0])
            Teff_list.append(finalresults[1])
            MAE_FeH_list.append(mae_test[0])
            MAE_Teff_list.append(mae_test[1])
            R2_list.append(r2score)
            Var_list.append(variancescore)

        if resolution[i] == '115000':
            wide_error_FeH = ((np.std(FeH_list))**2 + (0.10)**2)**(1 / 2)
            wide_error_Teff = ((np.std(Teff_list))**2 + (65)**2)**(1 / 2)
        elif resolution[i] == '110000':
            wide_error_FeH = ((np.std(FeH_list))**2 + (0.10)**2)**(1 / 2)
            wide_error_Teff = ((np.std(Teff_list))**2 + (68)**2)**(1 / 2)
        elif resolution[i] == '94600':
            wide_error_FeH = ((np.std(FeH_list))**2 + (0.12)**2)**(1 / 2)
            wide_error_Teff = ((np.std(Teff_list))**2 + (77)**2)**(1 / 2)
        elif resolution[i] == '75000':
            wide_error_FeH = ((np.std(FeH_list))**2 + (0.13)**2)**(1 / 2)
            wide_error_Teff = ((np.std(Teff_list))**2 + (78)**2)**(1 / 2)
        elif resolution[i] == '48000':
            wide_error_FeH = ((np.std(FeH_list))**2 + (0.13)**2)**(1 / 2)
            wide_error_Teff = ((np.std(Teff_list))**2 + (80)**2)**(1 / 2)

        res_file.write(
            str(newlabels.iat[0, 0]) + ' ' + str(round(np.mean(FeH_list), 3)) +
            ' ' + str(round(np.std(FeH_list), 3)) + ' ' +
            str(round(np.mean(MAE_FeH_list), 3)) + ' ' +
            str(round((wide_error_FeH), 3)) + ' ' +
            str(int(np.mean(Teff_list))) + ' ' + str(int(np.std(Teff_list))) +
            ' ' + str(int(np.mean(MAE_Teff_list))) + ' ' +
            str(int(wide_error_Teff)) + ' ' + str(round(np.mean(R2_list), 3)) +
            ' ' + str(round(np.mean(Var_list), 3)) + "\n")

        starname = filepaths[i].replace('.fits',
                                        '').replace('spectra/' + 'newstars/',
                                                    '')

        print('Star ' + str(newlabels.iat[0, 0]) +
              ' results completed and saved in Paremeter_Results.dat')

        # plots of the FeH test

        set_res = 15
        fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.5))
        ax.set_title('[Fe/H]' + ' ' + 'model' + ' ' + 'testing',
                     fontsize=set_res * 1.5)
        ax.set_xlabel("M.L. [Fe/H] [dex]", fontsize=set_res * 1.5)
        ax.set_ylabel("Ref. [Fe/H] [dex]", fontsize=set_res * 1.5)
        ax.plot((-0.8, 0.4), (-0.8, 0.4), '--b', lw=2)  # for FeH
        ax.plot(test_predvalues.values[:, 1], test_givenvalues.values[:, 1],
                'ko')
        ax.tick_params(axis='both', labelsize=set_res * 1.5)
        ax.spines['right'].set_visible(True)
        ax.spines['top'].set_visible(True)
        plt.savefig("./" + MLplots_folder + "/" + starname +
                    '_FeH_test_comparison.pdf',
                    bbox_inches='tight')
        plt.close()

        # Feh diff
        set_res = 15
        fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.2))
        ax.set_xlabel("M.L. [Fe/H] [dex]", fontsize=set_res * 1.5)
        ax.set_ylabel("$\Delta$[Fe/H] [dex]", fontsize=set_res * 1.5)
        ax.plot((-0.8, 0.4), (0, 0), '--b', lw=2)  # for FeH
        ax.plot(test_predvalues.values[:, 1],
                test_predvalues.values[:, 1] - test_givenvalues.values[:, 1],
                'ko')
        ax.tick_params(axis='both', labelsize=set_res * 1.5)
        ax.spines['right'].set_visible(True)
        ax.spines['top'].set_visible(True)
        plt.savefig("./" + MLplots_folder + "/" + starname +
                    '_Diff_FeH_test_comparison.pdf',
                    bbox_inches='tight')
        plt.close()

        # plots of the Teff test
        set_res = 15
        fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.5))
        ax.set_title('T$_{\mathrm{eff}}$' + ' ' + 'model' + ' ' + 'testing',
                     fontsize=set_res * 1.5)
        ax.set_xlabel("M.L. T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5)
        ax.set_ylabel("Ref. T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5)
        ax.tick_params(axis='both', labelsize=set_res * 1.5)
        ax.spines['right'].set_visible(True)
        ax.spines['top'].set_visible(True)
        ax.plot((2700, 4000), (2700, 4000), '--b', lw=2)  #for Teff
        ax.plot(test_predvalues.values[:, 2], test_givenvalues.values[:, 2],
                'ko')
        ax.plot(clip_box=True, clip_on=True)
        plt.savefig("./" + MLplots_folder + "/" + starname +
                    '_Teff_test_comparison.pdf',
                    bbox_inches='tight')
        plt.close()

        # T diff
        set_res = 15
        fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.2))
        ax.set_xlabel("M.L. T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5)
        ax.set_ylabel("$\Delta$T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5)
        ax.tick_params(axis='both', labelsize=set_res * 1.5)
        ax.spines['right'].set_visible(True)
        ax.spines['top'].set_visible(True)
        ax.plot((2700, 4000), (0, 0), '--b', lw=2)  #for Teff
        ax.plot(test_predvalues.values[:, 2],
                test_predvalues.values[:, 2] - test_givenvalues.values[:, 2],
                'ko')
        ax.plot(clip_box=True, clip_on=True)
        plt.savefig("./" + MLplots_folder + "/" + starname +
                    '_Diff_Teff_test_comparison.pdf',
                    bbox_inches='tight')
        plt.close()

        # plots of the FeH train

        #set_res = 15
        #fig, ax = plt.subplots(figsize=(set_res*0.8,set_res*0.5))
        #ax.set_title('[Fe/H]'+' '+'model'+' '+'training', fontsize=set_res*1.5)
        #ax.set_xlabel("M.L. [Fe/H] [dex]", fontsize=set_res*1.5)
        #ax.set_ylabel("Ref. [Fe/H] [dex]", fontsize=set_res*1.5)
        #ax.plot((-0.8,0.4),(-0.8,0.4),'--b', lw=2) # for FeH
        #ax.plot(train_predvalues.values[:,1],train_givenvalues.values[:,1],'ko')
        #ax.tick_params(axis='both',labelsize=set_res*1.5)
        #ax.spines['right'].set_visible(True)
        #ax.spines['top'].set_visible(True)
        #plt.savefig("./"+MLplots_folder+"/"+starname+'_FeH_train_comparison.pdf', bbox_inches='tight')
        #plt.close()

        # plots of the Teff train

        #set_res = 15
        #fig, ax = plt.subplots(figsize=(set_res*0.8,set_res*0.5))
        #ax.set_title('T$_{\mathrm{eff}}$'+' '+'model'+' '+'training', fontsize=set_res*1.5)
        #ax.set_xlabel("M.L. T$_{\mathrm{eff}}$ [K]", fontsize=set_res*1.5)
        #ax.set_ylabel("Ref. T$_{\mathrm{eff}}$ [K]", fontsize=set_res*1.5)
        #ax.tick_params(axis='both',labelsize=set_res*1.5)
        #ax.spines['right'].set_visible(True)
        #ax.spines['top'].set_visible(True)
        #ax.plot((2700,4000),(2700,4000),'--b', lw=2) #for Teff
        #ax.plot(train_predvalues.values[:,2],train_givenvalues.values[:,2],'ko')
        #ax.plot(clip_box=True, clip_on=True)
        #plt.savefig("./"+MLplots_folder+"/"+starname+'_Teff_train_comparison.pdf', bbox_inches='tight')
        #plt.close()

    res_file.close()
예제 #16
0
 def trainingMethod(self):
      self.model= linear_model.MultiTaskElasticNet(alpha=0.1)
      self.multiTaskElasticModel =self.model.fit(self.dataset,self.target)
      self.predicctions = self.multiTaskElasticModel.predict(self.dataset)
      self.r_score = self.multiTaskElasticModel.score(self.dataset, self.target)
예제 #17
0
def run_regression(data,
                   isBinarized,
                   mean,
                   variance,
                   headers,
                   regrOptions={"name": "linear"}):
    train = data["train"]
    dev = data["dev"]
    kfolds = KFold(n_splits=10, shuffle=True, random_state=1)
    (x_train, x_dev, mapping) = binarize(train, dev, isBinarized, mean,
                                         variance)

    y_train = np.log(np.asarray([row[-1] for row in train]))
    y_dev = np.log(np.asarray([row[-1] for row in dev]))

    header_mapping = headers  #header_to_newCol(headers, mapping)

    models = {
        "linear":
        linear_model.LinearRegression(),
        "ridge":
        linear_model.RidgeCV(cv=kfolds),
        "lasso":
        linear_model.LassoCV(cv=kfolds),
        "GradientBoostingRegressor":
        GradientBoostingRegressor(n_estimators=3000,
                                  learning_rate=0.05,
                                  max_depth=4,
                                  max_features='sqrt',
                                  min_samples_leaf=15,
                                  min_samples_split=10,
                                  loss='huber',
                                  random_state=5),
        "elastic":
        linear_model.ElasticNet(alpha=0.1,
                                copy_X=True,
                                fit_intercept=True,
                                l1_ratio=0.5,
                                max_iter=1000,
                                normalize=False,
                                random_state=None,
                                selection='cyclic',
                                tol=0.0001,
                                warm_start=False),
        "SGD":
        linear_model.SGDRegressor()
    }

    if regrOptions["name"] == "linear":
        regr = linear_model.LinearRegression()

    elif regrOptions["name"] == "ridge":
        regr = linear_model.RidgeCV(cv=kfolds)  #alpha = regrOptions["alpha"])

    elif regrOptions["name"] == "poly":
        new_matrix = []
        header_mapping = header_to_newCol(headers, mapping)
        headersToInclude = ["GrLivArea", "LotArea"]
        new_train_matrix = add_polynomial_columns(header_mapping,
                                                  headersToInclude, x_train)
        new_dev_matrix = add_polynomial_columns(header_mapping,
                                                headersToInclude, x_dev)

        new_train_matrix = Pipeline([
            ("polynomial_features",
             preprocessing.PolynomialFeatures(degree=2, include_bias=False))
        ]).fit_transform(new_train_matrix)

        new_dev_matrix = Pipeline([
            ("polynomial_features",
             preprocessing.PolynomialFeatures(degree=2, include_bias=False))
        ]).fit_transform(new_dev_matrix)

        x_train = np.concatenate((np.asarray(x_train), new_train_matrix),
                                 axis=1)
        print(x_dev.shape, new_dev_matrix.shape)
        x_dev = np.concatenate((np.asarray(x_dev), new_dev_matrix), axis=1)

        regr = linear_model.LinearRegression()

    elif regrOptions["name"] == "lasso":
        regr = linear_model.LassoCV(cv=kfolds)

    elif regrOptions["name"] == "GradientBoostingRegressor":
        regr = GradientBoostingRegressor(n_estimators=3000,
                                         learning_rate=0.05,
                                         max_depth=4,
                                         max_features='sqrt',
                                         min_samples_leaf=15,
                                         min_samples_split=10,
                                         loss='huber',
                                         random_state=5)

    elif regrOptions["name"] == "MultiTaskElasticNet":
        regr = linear_model.MultiTaskElasticNet()

    elif regrOptions["name"] == "stacking":
        predictions = pd.DataFrame()
        test_predictions = pd.DataFrame()
        model = linear_model.LinearRegression()
        for name, model in models.items():
            if name == "ridge":
                continue

            print(f"x_train: {pd.DataFrame(x_train).shape}")
            print(f"y_train: {pd.DataFrame(y_train).shape}")

            (test_pred, train_pred) = stacking(model=model,
                                               n_fold=10,
                                               xtrain=pd.DataFrame(x_train),
                                               x_dev=pd.DataFrame(x_dev),
                                               ytrain=pd.DataFrame(y_train))

            print(f"test_pred: {pd.DataFrame(test_pred).shape}")
            print(f"train_pred: {pd.DataFrame(train_pred).shape}")

            predictions = pd.concat(
                [predictions, pd.DataFrame(train_pred)], axis=1)
            print(f"predictions: {predictions.shape}")
            test_predictions = pd.concat(
                [test_predictions, pd.DataFrame(test_pred)], axis=1)
            print(test_predictions.shape)
            print("print")
        regr = models["ridge"]
        regr.fit(predictions, pd.DataFrame(y_train))
        #score = regr.score(test_predictions, pd.DataFrame(y_dev))
        #print(score)
        print((test_predictions).shape)
        rmsle = np.sqrt(mean_squared_error(test_predictions, y_dev))
        print("Whats up doc")
        print(score)
        print(rmsle)
        return (rmsle, regr)

    elif regrOptions["name"] == "average":
        models["linear"].fit(x_train, y_train)
        # models["lasso"].fit(x_train, y_train)
        models["GradientBoostingRegressor"].fit(x_train, y_train)
        models["ridge"].fit(x_train, y_train)
        models["SGD"].fit(x_train, y_train)

        pred1 = models["linear"].predict(x_dev)
        # pred2=models["lasso"].predict(x_dev)
        pred3 = models["GradientBoostingRegressor"].predict(x_dev)
        pred4 = models["ridge"].predict(x_dev)
        pred5 = models["SGD"].predict(x_dev)

        predict = (pred1 + pred3 + pred4 + pred5) / 4  #+pred2
        rmsle = np.sqrt(mean_squared_error(y_dev, predict))
        print(rmsle)
        return (rmsle, models, x_train, y_train)

    if regrOptions["name"] == "GradientBoostingRegressor":
        regr.fit(x_train, y_train)
        regr.score(x_train, y_train)

    else:
        regr.fit(x_train, y_train)
        coef = regr.coef_

    y_pred = regr.predict(x_dev)

    rmsle = np.sqrt(mean_squared_error(y_dev, y_pred))

    if regrOptions["name"] == "linear":
        top_bottom(mapping, headers, coef, 10)
    print('Intercept: \n', regr.intercept_)

    return (rmsle, regr)
예제 #18
0
def train_MultiTaskElasticNet(X_train, y_train):
    model = linear_model.MultiTaskElasticNet(alpha=0.1)
    model.fit(X_train, y_train)
    return model
예제 #19
0
from sklearn.cross_validation import ShuffleSplit, train_test_split
cv = ShuffleSplit(X_scale.shape[0], n_iter=10, test_size=0.1, random_state=123)

# Generate the training set sizes
train_sizes = np.rint(
    np.linspace(X_scale.shape[0] * 0.1, X_scale.shape[0] * 0.9 - 1,
                9)).astype(int)

# MultiTaskElasticNet
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

regressor = make_pipeline(PolynomialFeatures(degree=3, include_bias=True, interaction_only=False) \
                          , linear_model.MultiTaskElasticNet(alpha=0.0, copy_X=True, fit_intercept=True, l1_ratio=0.1,
          max_iter=2000, normalize=False, random_state=None,
          selection='cyclic', tol=0.0001, warm_start=False))

# Calculate the training and testing scores
import sklearn.learning_curve as curves
from sklearn.metrics import r2_score
from sklearn.metrics import make_scorer
r2_scorer = make_scorer(r2_score, multioutput='uniform_average')
sizes, train_scores, test_scores = curves.learning_curve(regressor, X_all, y_scale, \
            cv = cv, train_sizes = train_sizes, scoring = r2_scorer)

# Find the mean and standard deviation for smoothing
train_std = np.std(train_scores, axis=1)
train_mean = np.mean(train_scores, axis=1)
test_std = np.std(test_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
예제 #20
0
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None):
    from sklearn import datasets, neighbors, linear_model, svm

    totalTime = 0

    startTrainTime = time()
    logger.info("Start training...")
    if model_type == 'ARDRegression':
        model = linear_model.ARDRegression().fit(train_x, train_y)
    elif model_type == 'BayesianRidge':
        model = linear_model.BayesianRidge().fit(train_x, train_y)
    elif model_type == 'ElasticNet':
        model = linear_model.ElasticNet().fit(train_x, train_y)
    elif model_type == 'ElasticNetCV':
        model = linear_model.ElasticNetCV().fit(train_x, train_y)
    elif model_type == 'HuberRegressor':
        model = linear_model.HuberRegressor().fit(train_x, train_y)
    elif model_type == 'Lars':
        model = linear_model.Lars().fit(train_x, train_y)
    elif model_type == 'LarsCV':
        model = linear_model.LarsCV().fit(train_x, train_y)
    elif model_type == 'Lasso':
        model = linear_model.Lasso().fit(train_x, train_y)
    elif model_type == 'LassoCV':
        model = linear_model.LassoCV().fit(train_x, train_y)
    elif model_type == 'LassoLars':
        model = linear_model.LassoLars().fit(train_x, train_y)
    elif model_type == 'LassoLarsCV':
        model = linear_model.LassoLarsCV().fit(train_x, train_y)
    elif model_type == 'LassoLarsIC':
        model = linear_model.LassoLarsIC().fit(train_x, train_y)
    elif model_type == 'LinearRegression':
        model = linear_model.LinearRegression().fit(train_x, train_y)
    elif model_type == 'LogisticRegression':
        model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'LogisticRegressionCV':
        model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'MultiTaskLasso':
        model = linear_model.MultiTaskLasso().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNet':
        model = linear_model.MultiTaskElasticNet().fit(train_x, train_y)
    elif model_type == 'MultiTaskLassoCV':
        model = linear_model.MultiTaskLassoCV().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNetCV':
        model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuit':
        model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuitCV':
        model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveClassifier':
        model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveRegressor':
        model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y)
    elif model_type == 'Perceptron':
        model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RandomizedLasso':
        model = linear_model.RandomizedLasso().fit(train_x, train_y)
    elif model_type == 'RandomizedLogisticRegression':
        model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y)
    elif model_type == 'RANSACRegressor':
        model = linear_model.RANSACRegressor().fit(train_x, train_y)
    elif model_type == 'Ridge':
        model = linear_model.Ridge().fit(train_x, train_y)
    elif model_type == 'RidgeClassifier':
        model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeClassifierCV':
        model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeCV':
        model = linear_model.RidgeCV().fit(train_x, train_y)
    elif model_type == 'SGDClassifier':
        model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SGDRegressor':
        model = linear_model.SGDRegressor().fit(train_x, train_y)
    elif model_type == 'TheilSenRegressor':
        model = linear_model.TheilSenRegressor().fit(train_x, train_y)
    elif model_type == 'lars_path':
        model = linear_model.lars_path().fit(train_x, train_y)
    elif model_type == 'lasso_path':
        model = linear_model.lasso_path().fit(train_x, train_y)
    elif model_type == 'lasso_stability_path':
        model = linear_model.lasso_stability_path().fit(train_x, train_y)
    elif model_type == 'logistic_regression_path':
        model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'orthogonal_mp':
        model = linear_model.orthogonal_mp().fit(train_x, train_y)
    elif model_type == 'orthogonal_mp_gram':
        model = linear_model.orthogonal_mp_gram().fit(train_x, train_y)
    elif model_type == 'LinearSVC':
        model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SVC':
        model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y)
    else:
        raise NotImplementedError('Model not implemented')

        
    logger.info("Finished training.")
    endTrainTime = time()
    trainTime = endTrainTime - startTrainTime
    logger.info("Training time : %d seconds" % trainTime)


    logger.info("Start predicting train set...")
    train_pred_y = model.predict(train_x)
    logger.info("Finished predicting train set.")
    logger.info("Start predicting test set...")
    test_pred_y = model.predict(test_x)
    logger.info("Finished predicting test set.")
    endTestTime = time()
    testTime = endTestTime - endTrainTime
    logger.info("Testing time : %d seconds" % testTime)
    totalTime += trainTime + testTime

    train_pred_y = np.round(train_pred_y)
    test_pred_y = np.round(test_pred_y)

    np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i')

    logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y)))
    logger.info('[TEST]  Acc: %.3f' % (accuracy_score(test_y, test_pred_y)))

    return accuracy_score(test_y, test_pred_y)