def fit(self, X, y): print "Fitting a multiclass ElasticNet regressor..." assert (y.shape[1] == 6) self.standardizer = preprocessing.StandardScaler() X = self.standardizer.fit_transform(X) cv = model_selection.ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) alpha_range = [ 0.005, 0.007, 0.002, 0.0025, 0.004, 0.003, 0.0035877427142009029, 0.01, 0.001 ] param_grid = [] param_grid.append( dict(alpha=alpha_range, l1_ratio=[.1, .2, .25, .3, .35, .4, .5, .6, .65, .7, .8], normalize=[True], max_iter=[10000])) print "Using param grid " + str(param_grid) self.clf = model_selection.GridSearchCV( linear_model.MultiTaskElasticNet(), param_grid=param_grid, cv=cv, n_jobs=12) self.clf.fit(X, y) print "Best params: " + str( self.clf.best_params_) + " and corresponding score is " + str( self.clf.best_score_)
def start_ltm_multi(tup, taus, w=0.1, alpha=0.001, **kwargs): X = _make_base(tup, taus, w=w) mod = lm.MultiTaskElasticNet(alpha=alpha, **kwargs) mod.max_iter = 5e4 mod.verbose = 0 mod.fit_intercept = 0 mod.normalize = 1 mod.fit(X, tup.data) fit = mod.predict(X) coefs = mod.coef_ return mod, coefs, fit, None
def test_model_multi_task_elasticnet(self): model, X = fit_regression_model(linear_model.MultiTaskElasticNet(), n_targets=2) model_onnx = convert_sklearn( model, "multi-task elasticnet", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, verbose=False, basename="SklearnMultiTaskElasticNet-Dec4")
def LDM(data, irf_fwhm=0.1, n_taus=None, alpha=1, p=0, cv=False, max_iter=1e4, lim_log=(None, None)): """irf_fwhm=0.1 is in ps in default, the same as data time units """ assert type(data) == Data dt = data.times[1] - data.times[0] max_t = data.times[-1] start = np.floor(np.log10(dt)) end = np.ceil(np.log10(max_t)) n = int(30 * (end - start)) if n_taus is None else n_taus lim_log = (start if lim_log[0] is None else lim_log[0], end if lim_log[1] is None else lim_log[1]) taus = np.logspace(lim_log[0], lim_log[1], n, endpoint=True) X = _X(taus, data.times, irf_fwhm) if p == 0: if not cv: mod = lm.Ridge(alpha=alpha, max_iter=None, solver='svd') else: mod = lm.RidgeCV() else: mod = lm.MultiTaskElasticNet(alpha=alpha, l1_ratio=p, max_iter=max_iter) mod.verbose = 0 mod.fit_intercept = False mod.copy_X = True # coefs = np.empty((n, data.wavelengths.shape[0])) # fit = np.empty_like(data.D) # alphas = np.empty(data.wavelengths.shape[0]) # for i in range(data.wavelengths.shape[0]): # mod.fit(X, data.D[:, i]) # coefs[:, i] = mod.coef_.copy() # fit[:, i] = mod.predict(X) # if hasattr(mod, 'alpha_'): # alphas[i] = mod.alpha_ mod.fit(X, data.D) fit = mod.predict(X) if hasattr(mod, 'alpha_'): alpha = mod.alpha_ return mod.coef_.T, Data.from_matrix(fit, data.times, data.wavelengths), taus
def test_model_multi_task_elasticnet(self): model, X = fit_regression_model(linear_model.MultiTaskElasticNet(), n_targets=2) model_onnx = convert_sklearn( model, "multi-task elasticnet", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, verbose=False, basename="SklearnMultiTaskElasticNet-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest): modelForConsideration: DataFrame = pd.DataFrame() LinerModels = \ [ linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(), linear_model.ElasticNetCV(), linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(), linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(), linear_model.LinearRegression(), linear_model.MultiTaskLasso(), linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(), linear_model.OrthogonalMatchingPursuit(), linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(), linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(), linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(), linear_model.RidgeClassifierCV(), linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(), linear_model.TheilSenRegressor(), linear_model.enet_path(xTrain, yTrain), linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain), # linear_model.LogisticRegression() # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression() ] for model in LinerModels: modelName: str = model.__class__.__name__ try: # print(f"Preparing Model {modelName}") if modelName == "LogisticRegression": model = linear_model.LogisticRegression(random_state=0) model.fit(xTrain, yTrain) yTrainPredict = model.predict(xTrain) yTestPredict = model.predict(xTest) errorList = calculate_prediction_error(modelName, yTestPredict, yTest, yTrainPredict, yTrain) if errorList["Test Average Error"][0] < 30 and errorList[ "Train Average Error"][0] < 30: try: modelForConsideration = modelForConsideration.append( errorList) except (Exception) as e: print(e) except (Exception, ArithmeticError) as e: print(f"Error occurred while preparing Model {modelName}") return modelForConsideration
def multi_task_elastic_net(X, q, cv=False, alpha=0.0038, l1_ratio=0.632): ''' Multi Task Elastic Net with dimensions forced to share features both l1 and l2 regularization is employed in the Elastic Net formulation Running cross-val gives alpha = 0.0038, l1_ratio = 0.632 ''' if cv: l1_ratio_list = np.linspace(0.1, 1.0, 10) #l1_ratio_list = 1-np.exp(-np.arange(1, 10)/2.0) clf = lm.MultiTaskElasticNetCV(l1_ratio=l1_ratio_list, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=False, cv=3, verbose=True, n_jobs=-1) else: clf = lm.MultiTaskElasticNet( alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False) clf.fit(X, q) theta = clf.coef_.T res = q - np.dot(X, theta) return theta, res
from sklearn.linear_model import ElasticNet from sklearn.datasets import make_regression X, y = make_regression(n_features=2, random_state=0) print("Create dataset X with prediction y:\n", X[:1], y[:1]) regr = ElasticNet(random_state=0) print("Create ElasticNet model:\n", regr) print("Train ElasticNet model:\n", regr.fit(X, y)) print("Get the coef W:\n", regr.coef_) print("Get the intercept alpha:\n", regr.intercept_) print("Get the score:\n", regr.predict([0, 0])) print("Useful when there are multiple features which are correlated with one another") print("-" * 200) print("\t"*1 + "1.1.6 Multi task Elastic-Net") print("It use mixed L1 and L2-norm and L2-norm for regularization") from sklearn import linear_model clf = linear_model.MultiTaskElasticNet(alpha=0.1) print("Create multitaskelasticnet model:\n", clf) print("Training of multitaskelasticnet model:\n", clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])) print("Get the coefficient W:\n", clf.coef_) print("Get the interception alpha:\n", clf.intercept_) print("It estimate sparse coefficients for multiple regression problems too") print("-" * 200) print("\t"*1 + "1.1.7 Least Angle Regression") print("It proceeds in a direction equiangular between features during each step of the regression") from sklearn import linear_model reg = linear_model.Lars(n_nonzero_coefs=1) print("Create Lars model:\n", reg) print("Train Lars model:\n", reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])) print("Get the coef W:\n", reg.coef_) print("It is sensitive to the effects of noise") print("-" * 200)
plt.plot(user_forecast_response, label="predicted") plt.plot(actual_values_response, label="actual") plt.legend() plt.title("1 January 2016") plt.ylabel("Solar Power (W/m2)") plt.xlabel("Hour") #%% [markdown] # Linear regression is not bad but we an do better! #%% [markdown] # ## MultiTask ElasticNet Regression #%% # training a multi-task elastic net model _prng = np.random.RandomState(42) elastic_net = linear_model.MultiTaskElasticNet(random_state=_prng) elastic_net.fit(transformed_training_features, training_target) #%% # measure training error _predictions = elastic_net.predict(transformed_training_features) np.sqrt(metrics.mean_squared_error(training_target, _predictions)) #%% # measure validation error _predictions = elastic_net.predict(transformed_validation_features) np.sqrt(metrics.mean_squared_error(validation_target, _predictions)) #%% # user requests forecast for 1 January 2016 which we predict using data from 31 December 2015! user_forecast_request = transformed_training_features[[-1], :]
import src.vector_gen.generate_VectorY as vecY import src.misc.split_train_valid as split import src.misc.paths as path from sklearn import linear_model import pandas as pd import numpy as np from sklearn.externals import joblib from src.misc import evaluation as eval np.set_printoptions(threshold=np.nan) df = pd.read_csv(path.trajectories_training_file2) #X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X,Y,test_size=0.2) training, validation, testing = split.split_dataset(df) X_train = vecX.generate_x_df(training) Y_train = vecY.generate_VectorY_df(training) X_test = vecX.generate_x_df(testing) Y_test = vecY.generate_VectorY_df(testing) clf = linear_model.MultiTaskElasticNet() clf.fit(X_train, Y_train) Y_pred = clf.predict(X_test) error = eval.mape(Y_pred, Y_test) print(error) print(np.mean(np.array(error)))
import pytest from sklearn import linear_model import numpy import chaospy LINEAR_MODELS = { "none": None, "linear": linear_model.LinearRegression(fit_intercept=False), "elastic_net": linear_model.MultiTaskElasticNet(alpha=0.0001, fit_intercept=False), "lasso": linear_model.MultiTaskLasso(alpha=0.001, fit_intercept=False), "lasso_lars": linear_model.LassoLars(alpha=0.0001, fit_intercept=False), "lars": linear_model.Lars(n_nonzero_coefs=10, fit_intercept=False), "matching_pursuit": linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=10, fit_intercept=False), "ridge": linear_model.Ridge(alpha=0.1, fit_intercept=False), } @pytest.fixture def samples(joint): return joint.sample(1000, rule="sobol")
from sklearn import linear_model from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score import matplotlib.pyplot as plt import numpy as np # 多任务岭回归 x, y = datasets.make_regression(n_samples=1000, n_features=1, n_targets=10, noise=10, random_state=0) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0) # 弹性网络 reg = linear_model.MultiTaskElasticNet(0.1) # 多任务弹性网络回归 reg = linear_model.MultiTaskLasso(0.1) # 多任务lasso回归 reg = linear_model.MultiTaskLassoCV(0.1) # 多任务lasso回归 reg = linear_model.MultiTaskElasticNetCV(0.1) # 多任务弹性网络回归 reg.fit(x_train, y_train) print(reg.coef_, reg.intercept_) y_pred = reg.predict(x_test) # 平均绝对误差 print(mean_absolute_error(y_test, y_pred)) # 均方误差 print(mean_squared_error(y_test, y_pred))
def get_regression_estimators(r, regression_models): if r == 'ARDRegression': regression_models[r] = linear_model.ARDRegression() elif r == 'BayesianRidge': regression_models[r] = linear_model.BayesianRidge() elif r == 'ElasticNet': regression_models[r] = linear_model.ElasticNet() elif r == 'ElasticNetCV': regression_models[r] = linear_model.ElasticNetCV() elif r == 'HuberRegressor': regression_models[r] = linear_model.HuberRegressor() elif r == 'Lars': regression_models[r] = linear_model.Lars() elif r == 'LarsCV': regression_models[r] = linear_model.LarsCV() elif r == 'Lasso': regression_models[r] = linear_model.Lasso() elif r == 'LassoCV': regression_models[r] = linear_model.LassoCV() elif r == 'LassoLars': regression_models[r] = linear_model.LassoLars() elif r == 'LassoLarsCV': regression_models[r] = linear_model.LassoLarsCV() elif r == 'LassoLarsIC': regression_models[r] = linear_model.LassoLarsIC() elif r == 'LinearRegression': regression_models[r] = linear_model.LinearRegression() elif r == 'LogisticRegression': regression_models[r] = linear_model.LogisticRegression() elif r == 'LogisticRegressionCV': regression_models[r] = linear_model.LogisticRegressionCV() elif r == 'MultiTaskElasticNet': regression_models[r] = linear_model.MultiTaskElasticNet() elif r == 'MultiTaskElasticNetCV': regression_models[r] = linear_model.MultiTaskElasticNetCV() elif r == 'MultiTaskLasso': regression_models[r] = linear_model.MultiTaskLasso() elif r == 'MultiTaskLassoCV': regression_models[r] = linear_model.MultiTaskLassoCV() elif r == 'OrthogonalMatchingPursuit': regression_models[r] = linear_model.OrthogonalMatchingPursuit() elif r == 'OrthogonalMatchingPursuitCV': regression_models[r] = linear_model.OrthogonalMatchingPursuitCV() elif r == 'PassiveAggressiveClassifier': regression_models[r] = linear_model.PassiveAggressiveClassifier() elif r == 'PassiveAggressiveRegressor': regression_models[r] = linear_model.PassiveAggressiveRegressor() elif r == 'Perceptron': regression_models[r] = linear_model.Perceptron() elif r == 'RANSACRegressor': regression_models[r] = linear_model.RANSACRegressor() elif r == 'Ridge': regression_models[r] = linear_model.Ridge() elif r == 'RidgeClassifier': regression_models[r] = linear_model.RidgeClassifier() elif r == 'RidgeClassifierCV': regression_models[r] = linear_model.RidgeClassifierCV() elif r == 'RidgeCV': regression_models[r] = linear_model.RidgeCV() elif r == 'SGDClassifier': regression_models[r] = linear_model.SGDClassifier() elif r == 'SGDRegressor': regression_models[r] = linear_model.SGDRegressor() elif r == 'TheilSenRegressor': regression_models[r] = linear_model.TheilSenRegressor() else: print( r + " is an unsupported regression type. Check if you have misspelled the name." )
axis=0)**(0.5) rtmnSqDictNetLassoSep = dict([ (TranscOrder[i], rtmnSqErrorByGeneNetLassoSep[i]) for i in range(len(rtmnSqErrorByGeneNetLassoSep)) ]) for ky in LassoRMSESep.index: LassoRMSESep.loc[ky, j] = rtmnSqDictLassoSep[ky] for ky in NetLassoRMSESep.index: if len(network_bygene[ky]): NetLassoRMSESep.loc[ky, j] = rtmnSqDictNetLassoSep[ky] else: NetLassoRMSESep.loc[ky, j] = 'NA' ElasticNet = lm.MultiTaskElasticNet(alpha=parameters["MTENalpha"], l1_ratio=parameters["MTENl1R"]) ElasticNet.fit(trainDataIn, trainDataOut) predictedTranscript_ElasticNet = np.array( ElasticNet.predict(testDataIn)) SqErrors_EN = (testDataOut - predictedTranscript_ElasticNet)**2 rtmnSqErrorByGene_EN = np.sum(SqErrors_EN, axis=0)**(0.5) rtmnSqDict_EN = dict([(TranscOrder[i], rtmnSqErrorByGene_EN[i]) for i in range(len(rtmnSqErrorByGene_EN))]) for ky in ElasticNetRMSE.index: ElasticNetRMSE.loc[ky, j] = rtmnSqDict_EN[ky] predictedTranscript_ENSep = np.empty_like(predictedTranscriptLasso) predictedTranscript_NetENSep = np.zeros_like(predictedTranscriptLasso)
def ML(regression): if regression == 'linear': reg = linear_model.LinearRegression() elif regression == 'ridge': reg = linear_model.Ridge(alpha=1) elif regression == 'ridgecv': reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0], cv=5, gcv_mode=None) elif regression == 'multitasklasso': reg = linear_model.MultiTaskLasso(alpha=1) elif regression == 'multitaskelasticnet': reg = linear_model.MultiTaskElasticNet(alpha=1) spectralist = np.loadtxt('final1Dfilelist.dat', dtype=str) if spectralist.ndim > 1: filepaths = spectralist[:, 0] resolution = spectralist[:, 1] else: filepaths = [spectralist[0]] resolution = [spectralist[1]] directory_name = 'EWmyresults' res_file = open('Parameter_Results.dat', 'w') res_file.write( "# newstars [Fe/H] STD_FeH MAE_[Fe/H] Wide_Error_[Fe/H] Teff STD_Teff MAE_Teff Wide_Error_Teff R2_score EV_score \n" ) MLplots_folder = 'Model_Prediction_Plots' if not os.path.exists(MLplots_folder): os.makedirs(MLplots_folder) for i in np.arange(len(filepaths)): df = pd.read_csv('res' + resolution[i] + '_RefEWPar.csv') df.dropna(axis=1, inplace=True) names = ['names'] y = df.columns.values[-2:] newlines = np.loadtxt('./' + directory_name + filepaths[i].replace( '.fits', '').replace('spectra/' + 'newstars/', '') + 'centrallines.dat', dtype=str) newdf = pd.read_csv('./' + directory_name + filepaths[i].replace( '.fits', '').replace('spectra/' + 'newstars/', '') + '_newstars.csv') newdf.dropna(axis=1, inplace=True) zeroews = newdf.mask(newdf < 0.00001) newnames = ['newstars'] newlabels = newdf[newnames] newdf = zeroews.dropna('columns') newlines = newdf.columns.values[1::] df_x = df.drop(y, axis=1) df_y = df[y] df_x = pd.concat([df.loc[:, 'names'], df.loc[:, newlines]], axis=1) newdf = newdf.loc[:, newlines] FeH_list = [] Teff_list = [] MAE_FeH_list = [] MAE_Teff_list = [] Var_list = [] R2_list = [] for k in range(100): x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.30) labels_train = x_train[names] labels_test = x_test[names] x_train.drop(names, axis=1, inplace=True) x_test.drop(names, axis=1, inplace=True) reg.fit(x_train, y_train) joblib.dump(reg, 'savedmodel.pkl') y_pred_test = reg.predict(x_test) y_pred_train = reg.predict(x_train) N = len(y_test) starttime = time() y_pred_test = reg.predict(x_test) elapsedtime = time() t = elapsedtime - starttime #score_test = reg.score(x_test, y_test) variancescore = explained_variance_score(y_test, y_pred_test) r2score = r2_score(y_test, y_pred_test) reg = joblib.load('savedmodel.pkl') #loading the saved model pred_newdf = reg.predict(newdf) #applying the saved model finalresults = pd.concat( [newlabels, pd.DataFrame(pred_newdf)], axis=1) #print(finalresults) #print('Calculated parameters for {} stars in {:.2f}ms'.format(N, t*1e3)) #print ('test score:', score_test) #print ('variance score:', variancescore) #print ('r2score:', r2score) mae_train = mean_abso_error(y_train[:], y_pred_train[:]) mape_train = mean_absolute_percentage_error( y_train[:], y_pred_train[:]) #print('Mean Absolute Error of Train : ' + str(mae_train)) #print('Mean Absolute Percentage Error of Train : '+ str(mape_train)) mae_test = mean_abso_error(y_test[:], y_pred_test[:]) mape_test = mean_absolute_percentage_error(y_test[:], y_pred_test[:]) #print('Mean Absolute Error of Test : ' + str(mae_test)) #print('Mean Absolute Percentage Error of Test : '+ str(mape_test)) train_givenvalues = pd.concat([labels_train, y_train], axis=1) train_givenvalues = train_givenvalues.reset_index(drop=True) new_labeltrain = labels_train.reset_index(drop=True) train_predvalues = pd.concat( [new_labeltrain, pd.DataFrame(y_pred_train)], axis=1) test_givenvalues = pd.concat([labels_test, y_test], axis=1) test_givenvalues = test_givenvalues.reset_index(drop=True) new_labeltest = labels_test.reset_index(drop=True) test_predvalues = pd.concat( [new_labeltest, pd.DataFrame(y_pred_test)], axis=1) FeH_list.append(finalresults[0]) Teff_list.append(finalresults[1]) MAE_FeH_list.append(mae_test[0]) MAE_Teff_list.append(mae_test[1]) R2_list.append(r2score) Var_list.append(variancescore) if resolution[i] == '115000': wide_error_FeH = ((np.std(FeH_list))**2 + (0.10)**2)**(1 / 2) wide_error_Teff = ((np.std(Teff_list))**2 + (65)**2)**(1 / 2) elif resolution[i] == '110000': wide_error_FeH = ((np.std(FeH_list))**2 + (0.10)**2)**(1 / 2) wide_error_Teff = ((np.std(Teff_list))**2 + (68)**2)**(1 / 2) elif resolution[i] == '94600': wide_error_FeH = ((np.std(FeH_list))**2 + (0.12)**2)**(1 / 2) wide_error_Teff = ((np.std(Teff_list))**2 + (77)**2)**(1 / 2) elif resolution[i] == '75000': wide_error_FeH = ((np.std(FeH_list))**2 + (0.13)**2)**(1 / 2) wide_error_Teff = ((np.std(Teff_list))**2 + (78)**2)**(1 / 2) elif resolution[i] == '48000': wide_error_FeH = ((np.std(FeH_list))**2 + (0.13)**2)**(1 / 2) wide_error_Teff = ((np.std(Teff_list))**2 + (80)**2)**(1 / 2) res_file.write( str(newlabels.iat[0, 0]) + ' ' + str(round(np.mean(FeH_list), 3)) + ' ' + str(round(np.std(FeH_list), 3)) + ' ' + str(round(np.mean(MAE_FeH_list), 3)) + ' ' + str(round((wide_error_FeH), 3)) + ' ' + str(int(np.mean(Teff_list))) + ' ' + str(int(np.std(Teff_list))) + ' ' + str(int(np.mean(MAE_Teff_list))) + ' ' + str(int(wide_error_Teff)) + ' ' + str(round(np.mean(R2_list), 3)) + ' ' + str(round(np.mean(Var_list), 3)) + "\n") starname = filepaths[i].replace('.fits', '').replace('spectra/' + 'newstars/', '') print('Star ' + str(newlabels.iat[0, 0]) + ' results completed and saved in Paremeter_Results.dat') # plots of the FeH test set_res = 15 fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.5)) ax.set_title('[Fe/H]' + ' ' + 'model' + ' ' + 'testing', fontsize=set_res * 1.5) ax.set_xlabel("M.L. [Fe/H] [dex]", fontsize=set_res * 1.5) ax.set_ylabel("Ref. [Fe/H] [dex]", fontsize=set_res * 1.5) ax.plot((-0.8, 0.4), (-0.8, 0.4), '--b', lw=2) # for FeH ax.plot(test_predvalues.values[:, 1], test_givenvalues.values[:, 1], 'ko') ax.tick_params(axis='both', labelsize=set_res * 1.5) ax.spines['right'].set_visible(True) ax.spines['top'].set_visible(True) plt.savefig("./" + MLplots_folder + "/" + starname + '_FeH_test_comparison.pdf', bbox_inches='tight') plt.close() # Feh diff set_res = 15 fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.2)) ax.set_xlabel("M.L. [Fe/H] [dex]", fontsize=set_res * 1.5) ax.set_ylabel("$\Delta$[Fe/H] [dex]", fontsize=set_res * 1.5) ax.plot((-0.8, 0.4), (0, 0), '--b', lw=2) # for FeH ax.plot(test_predvalues.values[:, 1], test_predvalues.values[:, 1] - test_givenvalues.values[:, 1], 'ko') ax.tick_params(axis='both', labelsize=set_res * 1.5) ax.spines['right'].set_visible(True) ax.spines['top'].set_visible(True) plt.savefig("./" + MLplots_folder + "/" + starname + '_Diff_FeH_test_comparison.pdf', bbox_inches='tight') plt.close() # plots of the Teff test set_res = 15 fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.5)) ax.set_title('T$_{\mathrm{eff}}$' + ' ' + 'model' + ' ' + 'testing', fontsize=set_res * 1.5) ax.set_xlabel("M.L. T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5) ax.set_ylabel("Ref. T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5) ax.tick_params(axis='both', labelsize=set_res * 1.5) ax.spines['right'].set_visible(True) ax.spines['top'].set_visible(True) ax.plot((2700, 4000), (2700, 4000), '--b', lw=2) #for Teff ax.plot(test_predvalues.values[:, 2], test_givenvalues.values[:, 2], 'ko') ax.plot(clip_box=True, clip_on=True) plt.savefig("./" + MLplots_folder + "/" + starname + '_Teff_test_comparison.pdf', bbox_inches='tight') plt.close() # T diff set_res = 15 fig, ax = plt.subplots(figsize=(set_res * 0.8, set_res * 0.2)) ax.set_xlabel("M.L. T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5) ax.set_ylabel("$\Delta$T$_{\mathrm{eff}}$ [K]", fontsize=set_res * 1.5) ax.tick_params(axis='both', labelsize=set_res * 1.5) ax.spines['right'].set_visible(True) ax.spines['top'].set_visible(True) ax.plot((2700, 4000), (0, 0), '--b', lw=2) #for Teff ax.plot(test_predvalues.values[:, 2], test_predvalues.values[:, 2] - test_givenvalues.values[:, 2], 'ko') ax.plot(clip_box=True, clip_on=True) plt.savefig("./" + MLplots_folder + "/" + starname + '_Diff_Teff_test_comparison.pdf', bbox_inches='tight') plt.close() # plots of the FeH train #set_res = 15 #fig, ax = plt.subplots(figsize=(set_res*0.8,set_res*0.5)) #ax.set_title('[Fe/H]'+' '+'model'+' '+'training', fontsize=set_res*1.5) #ax.set_xlabel("M.L. [Fe/H] [dex]", fontsize=set_res*1.5) #ax.set_ylabel("Ref. [Fe/H] [dex]", fontsize=set_res*1.5) #ax.plot((-0.8,0.4),(-0.8,0.4),'--b', lw=2) # for FeH #ax.plot(train_predvalues.values[:,1],train_givenvalues.values[:,1],'ko') #ax.tick_params(axis='both',labelsize=set_res*1.5) #ax.spines['right'].set_visible(True) #ax.spines['top'].set_visible(True) #plt.savefig("./"+MLplots_folder+"/"+starname+'_FeH_train_comparison.pdf', bbox_inches='tight') #plt.close() # plots of the Teff train #set_res = 15 #fig, ax = plt.subplots(figsize=(set_res*0.8,set_res*0.5)) #ax.set_title('T$_{\mathrm{eff}}$'+' '+'model'+' '+'training', fontsize=set_res*1.5) #ax.set_xlabel("M.L. T$_{\mathrm{eff}}$ [K]", fontsize=set_res*1.5) #ax.set_ylabel("Ref. T$_{\mathrm{eff}}$ [K]", fontsize=set_res*1.5) #ax.tick_params(axis='both',labelsize=set_res*1.5) #ax.spines['right'].set_visible(True) #ax.spines['top'].set_visible(True) #ax.plot((2700,4000),(2700,4000),'--b', lw=2) #for Teff #ax.plot(train_predvalues.values[:,2],train_givenvalues.values[:,2],'ko') #ax.plot(clip_box=True, clip_on=True) #plt.savefig("./"+MLplots_folder+"/"+starname+'_Teff_train_comparison.pdf', bbox_inches='tight') #plt.close() res_file.close()
def trainingMethod(self): self.model= linear_model.MultiTaskElasticNet(alpha=0.1) self.multiTaskElasticModel =self.model.fit(self.dataset,self.target) self.predicctions = self.multiTaskElasticModel.predict(self.dataset) self.r_score = self.multiTaskElasticModel.score(self.dataset, self.target)
def run_regression(data, isBinarized, mean, variance, headers, regrOptions={"name": "linear"}): train = data["train"] dev = data["dev"] kfolds = KFold(n_splits=10, shuffle=True, random_state=1) (x_train, x_dev, mapping) = binarize(train, dev, isBinarized, mean, variance) y_train = np.log(np.asarray([row[-1] for row in train])) y_dev = np.log(np.asarray([row[-1] for row in dev])) header_mapping = headers #header_to_newCol(headers, mapping) models = { "linear": linear_model.LinearRegression(), "ridge": linear_model.RidgeCV(cv=kfolds), "lasso": linear_model.LassoCV(cv=kfolds), "GradientBoostingRegressor": GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=10, loss='huber', random_state=5), "elastic": linear_model.ElasticNet(alpha=0.1, copy_X=True, fit_intercept=True, l1_ratio=0.5, max_iter=1000, normalize=False, random_state=None, selection='cyclic', tol=0.0001, warm_start=False), "SGD": linear_model.SGDRegressor() } if regrOptions["name"] == "linear": regr = linear_model.LinearRegression() elif regrOptions["name"] == "ridge": regr = linear_model.RidgeCV(cv=kfolds) #alpha = regrOptions["alpha"]) elif regrOptions["name"] == "poly": new_matrix = [] header_mapping = header_to_newCol(headers, mapping) headersToInclude = ["GrLivArea", "LotArea"] new_train_matrix = add_polynomial_columns(header_mapping, headersToInclude, x_train) new_dev_matrix = add_polynomial_columns(header_mapping, headersToInclude, x_dev) new_train_matrix = Pipeline([ ("polynomial_features", preprocessing.PolynomialFeatures(degree=2, include_bias=False)) ]).fit_transform(new_train_matrix) new_dev_matrix = Pipeline([ ("polynomial_features", preprocessing.PolynomialFeatures(degree=2, include_bias=False)) ]).fit_transform(new_dev_matrix) x_train = np.concatenate((np.asarray(x_train), new_train_matrix), axis=1) print(x_dev.shape, new_dev_matrix.shape) x_dev = np.concatenate((np.asarray(x_dev), new_dev_matrix), axis=1) regr = linear_model.LinearRegression() elif regrOptions["name"] == "lasso": regr = linear_model.LassoCV(cv=kfolds) elif regrOptions["name"] == "GradientBoostingRegressor": regr = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=10, loss='huber', random_state=5) elif regrOptions["name"] == "MultiTaskElasticNet": regr = linear_model.MultiTaskElasticNet() elif regrOptions["name"] == "stacking": predictions = pd.DataFrame() test_predictions = pd.DataFrame() model = linear_model.LinearRegression() for name, model in models.items(): if name == "ridge": continue print(f"x_train: {pd.DataFrame(x_train).shape}") print(f"y_train: {pd.DataFrame(y_train).shape}") (test_pred, train_pred) = stacking(model=model, n_fold=10, xtrain=pd.DataFrame(x_train), x_dev=pd.DataFrame(x_dev), ytrain=pd.DataFrame(y_train)) print(f"test_pred: {pd.DataFrame(test_pred).shape}") print(f"train_pred: {pd.DataFrame(train_pred).shape}") predictions = pd.concat( [predictions, pd.DataFrame(train_pred)], axis=1) print(f"predictions: {predictions.shape}") test_predictions = pd.concat( [test_predictions, pd.DataFrame(test_pred)], axis=1) print(test_predictions.shape) print("print") regr = models["ridge"] regr.fit(predictions, pd.DataFrame(y_train)) #score = regr.score(test_predictions, pd.DataFrame(y_dev)) #print(score) print((test_predictions).shape) rmsle = np.sqrt(mean_squared_error(test_predictions, y_dev)) print("Whats up doc") print(score) print(rmsle) return (rmsle, regr) elif regrOptions["name"] == "average": models["linear"].fit(x_train, y_train) # models["lasso"].fit(x_train, y_train) models["GradientBoostingRegressor"].fit(x_train, y_train) models["ridge"].fit(x_train, y_train) models["SGD"].fit(x_train, y_train) pred1 = models["linear"].predict(x_dev) # pred2=models["lasso"].predict(x_dev) pred3 = models["GradientBoostingRegressor"].predict(x_dev) pred4 = models["ridge"].predict(x_dev) pred5 = models["SGD"].predict(x_dev) predict = (pred1 + pred3 + pred4 + pred5) / 4 #+pred2 rmsle = np.sqrt(mean_squared_error(y_dev, predict)) print(rmsle) return (rmsle, models, x_train, y_train) if regrOptions["name"] == "GradientBoostingRegressor": regr.fit(x_train, y_train) regr.score(x_train, y_train) else: regr.fit(x_train, y_train) coef = regr.coef_ y_pred = regr.predict(x_dev) rmsle = np.sqrt(mean_squared_error(y_dev, y_pred)) if regrOptions["name"] == "linear": top_bottom(mapping, headers, coef, 10) print('Intercept: \n', regr.intercept_) return (rmsle, regr)
def train_MultiTaskElasticNet(X_train, y_train): model = linear_model.MultiTaskElasticNet(alpha=0.1) model.fit(X_train, y_train) return model
from sklearn.cross_validation import ShuffleSplit, train_test_split cv = ShuffleSplit(X_scale.shape[0], n_iter=10, test_size=0.1, random_state=123) # Generate the training set sizes train_sizes = np.rint( np.linspace(X_scale.shape[0] * 0.1, X_scale.shape[0] * 0.9 - 1, 9)).astype(int) # MultiTaskElasticNet from sklearn import linear_model from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline regressor = make_pipeline(PolynomialFeatures(degree=3, include_bias=True, interaction_only=False) \ , linear_model.MultiTaskElasticNet(alpha=0.0, copy_X=True, fit_intercept=True, l1_ratio=0.1, max_iter=2000, normalize=False, random_state=None, selection='cyclic', tol=0.0001, warm_start=False)) # Calculate the training and testing scores import sklearn.learning_curve as curves from sklearn.metrics import r2_score from sklearn.metrics import make_scorer r2_scorer = make_scorer(r2_score, multioutput='uniform_average') sizes, train_scores, test_scores = curves.learning_curve(regressor, X_all, y_scale, \ cv = cv, train_sizes = train_sizes, scoring = r2_scorer) # Find the mean and standard deviation for smoothing train_std = np.std(train_scores, axis=1) train_mean = np.mean(train_scores, axis=1) test_std = np.std(test_scores, axis=1) test_mean = np.mean(test_scores, axis=1)
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None): from sklearn import datasets, neighbors, linear_model, svm totalTime = 0 startTrainTime = time() logger.info("Start training...") if model_type == 'ARDRegression': model = linear_model.ARDRegression().fit(train_x, train_y) elif model_type == 'BayesianRidge': model = linear_model.BayesianRidge().fit(train_x, train_y) elif model_type == 'ElasticNet': model = linear_model.ElasticNet().fit(train_x, train_y) elif model_type == 'ElasticNetCV': model = linear_model.ElasticNetCV().fit(train_x, train_y) elif model_type == 'HuberRegressor': model = linear_model.HuberRegressor().fit(train_x, train_y) elif model_type == 'Lars': model = linear_model.Lars().fit(train_x, train_y) elif model_type == 'LarsCV': model = linear_model.LarsCV().fit(train_x, train_y) elif model_type == 'Lasso': model = linear_model.Lasso().fit(train_x, train_y) elif model_type == 'LassoCV': model = linear_model.LassoCV().fit(train_x, train_y) elif model_type == 'LassoLars': model = linear_model.LassoLars().fit(train_x, train_y) elif model_type == 'LassoLarsCV': model = linear_model.LassoLarsCV().fit(train_x, train_y) elif model_type == 'LassoLarsIC': model = linear_model.LassoLarsIC().fit(train_x, train_y) elif model_type == 'LinearRegression': model = linear_model.LinearRegression().fit(train_x, train_y) elif model_type == 'LogisticRegression': model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'LogisticRegressionCV': model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'MultiTaskLasso': model = linear_model.MultiTaskLasso().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNet': model = linear_model.MultiTaskElasticNet().fit(train_x, train_y) elif model_type == 'MultiTaskLassoCV': model = linear_model.MultiTaskLassoCV().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNetCV': model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuit': model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuitCV': model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y) elif model_type == 'PassiveAggressiveClassifier': model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'PassiveAggressiveRegressor': model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y) elif model_type == 'Perceptron': model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RandomizedLasso': model = linear_model.RandomizedLasso().fit(train_x, train_y) elif model_type == 'RandomizedLogisticRegression': model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y) elif model_type == 'RANSACRegressor': model = linear_model.RANSACRegressor().fit(train_x, train_y) elif model_type == 'Ridge': model = linear_model.Ridge().fit(train_x, train_y) elif model_type == 'RidgeClassifier': model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeClassifierCV': model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeCV': model = linear_model.RidgeCV().fit(train_x, train_y) elif model_type == 'SGDClassifier': model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SGDRegressor': model = linear_model.SGDRegressor().fit(train_x, train_y) elif model_type == 'TheilSenRegressor': model = linear_model.TheilSenRegressor().fit(train_x, train_y) elif model_type == 'lars_path': model = linear_model.lars_path().fit(train_x, train_y) elif model_type == 'lasso_path': model = linear_model.lasso_path().fit(train_x, train_y) elif model_type == 'lasso_stability_path': model = linear_model.lasso_stability_path().fit(train_x, train_y) elif model_type == 'logistic_regression_path': model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'orthogonal_mp': model = linear_model.orthogonal_mp().fit(train_x, train_y) elif model_type == 'orthogonal_mp_gram': model = linear_model.orthogonal_mp_gram().fit(train_x, train_y) elif model_type == 'LinearSVC': model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SVC': model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y) else: raise NotImplementedError('Model not implemented') logger.info("Finished training.") endTrainTime = time() trainTime = endTrainTime - startTrainTime logger.info("Training time : %d seconds" % trainTime) logger.info("Start predicting train set...") train_pred_y = model.predict(train_x) logger.info("Finished predicting train set.") logger.info("Start predicting test set...") test_pred_y = model.predict(test_x) logger.info("Finished predicting test set.") endTestTime = time() testTime = endTestTime - endTrainTime logger.info("Testing time : %d seconds" % testTime) totalTime += trainTime + testTime train_pred_y = np.round(train_pred_y) test_pred_y = np.round(test_pred_y) np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i') logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y))) logger.info('[TEST] Acc: %.3f' % (accuracy_score(test_y, test_pred_y))) return accuracy_score(test_y, test_pred_y)