def findLassoAlpha(alpha, y, X, returnPred=False): X_train, X_test = X.loc['2013-10-01':'2015-04-01'], X.loc[ '2015-05-01':'2016-04-01'] y_train, y_test = y.loc['2013-10-01':'2015-04-01'], y.loc[ '2015-05-01':'2016-04-01'] datestotest = y_test.index dt = datestotest[0] lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5) lassoreg2.fit(X_train, y_train) y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1)) y_pred2 = pd.DataFrame(y_pred2) y_pred2.columns = y.columns prediction = y_pred2 X_train = X.loc['2013-10-01':dt] y_train = y.loc['2013-10-01':dt] for dt in datestotest[1:]: lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5) lassoreg2.fit(X_train, y_train) y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1)) y_pred2 = pd.DataFrame(y_pred2) y_pred2.columns = y.columns prediction = pd.concat([prediction, y_pred2]) X_train = X.loc['2013-10-01':dt] y_train = y.loc['2013-10-01':dt] prediction.index = y_test.index if (returnPred): return (y_test, prediction) else: return mean_squared_error(y_test, prediction)
def main(): pickledname = sys.argv[1] _qmDL = qmDL() dataset = _qmDL.load(pickledname=pickledname) X, Y, labels = dataset['XX'], dataset['T'], dataset['names'] #5000 training samples, with 2211 test samples X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=2211, random_state=42) print 'Len X train , test:', len(X_train), len(X_test) regressor = MultiTaskLasso().fit(X_train, Y_train) #r = SVR() #regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train) Y_pred = regressor.predict(X_test) print Y_pred print 'Y_pred', Y_pred.shape for i in xrange(len(labels)): print '*** MAE ', labels[i], print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
def test_dml(self): ################################# # Single treatment and outcome # ################################# X = TestPandasIntegration.df[TestPandasIntegration.features] W = TestPandasIntegration.df[TestPandasIntegration.controls] Y = TestPandasIntegration.df[TestPandasIntegration.outcome] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat] # Test LinearDML est = LinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='statsmodels') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # Test re-fit X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns}) est.fit(Y, T, X=X1, W=W, inference='statsmodels') self._check_input_names(est.summary(), feat_comp=X1.columns) # Test SparseLinearDML est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names( est.summary()) # Check that names propagate as expected # ForestDML est = ForestDML(model_y=GradientBoostingRegressor(), model_t=GradientBoostingRegressor()) est.fit(Y, T, X=X, W=W, inference='blb') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) #################################### # Mutiple treatments and outcomes # #################################### Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi] T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi] # Test LinearDML est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='statsmodels') self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True) est.fit(Y, T, X=X, W=W, inference='bootstrap') # Check bootstrap as well self._check_input_names(est.summary(), True, True) self._check_popsum_names( est.effect_inference(X).population_summary(), True) # Test SparseLinearDML est = SparseLinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso()) est.fit(Y, T, X=X, W=W, inference='debiasedlasso') treatment_effects = est.effect(X) lb, ub = est.effect_interval(X, alpha=0.05) self._check_input_names(est.summary(), True, True) # Check that names propagate as expected self._check_popsum_names( est.effect_inference(X).population_summary(), True)
def test_warm_start_multitask_lasso(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] clf = MultiTaskLasso(alpha=0.1, max_iter=5, warm_start=True) ignore_warnings(clf.fit)(X, Y) ignore_warnings(clf.fit)(X, Y) # do a second round with 5 iterations clf2 = MultiTaskLasso(alpha=0.1, max_iter=10) ignore_warnings(clf2.fit)(X, Y) assert_array_almost_equal(clf2.coef_, clf.coef_)
def constrained_multiclass_solve(w, psi, alpha=1.0, **lasso_kws): """ Solve .. math:: \\text{argmin}_s \\|s\\|_0 \ \\text{subject to} \\|w - psi s\\|_2^2 \\leq tol """ model = MultiTaskLasso(alpha=alpha, **lasso_kws) model.fit(psi, w) return model.coef_.T
def make_dictionary(X, n_components=20, alpha=5., write_dir='/tmp/', contrasts=[], method='multitask', l1_ratio=.5, n_subjects=13): """Create dictionary + encoding""" from sklearn.decomposition import dict_learning_online, sparse_encode from sklearn.preprocessing import StandardScaler from sklearn.linear_model import MultiTaskLasso, MultiTaskElasticNet mem = Memory(write_dir, verbose=0) dictionary = mem.cache(initial_dictionary)(n_components, X) np.savez(os.path.join(write_dir, 'dictionary.npz'), loadings=dictionary, contrasts=contrasts) if method == 'online': components, dictionary = dict_learning_online(X.T, n_components, alpha=alpha, dict_init=dictionary, batch_size=200, method='cd', return_code=True, shuffle=True, n_jobs=1, positive_code=True) np.savez(os.path.join(write_dir, 'dictionary.npz'), loadings=dictionary, contrasts=contrasts) elif method == 'sparse': components = sparse_encode(X.T, dictionary, alpha=alpha, max_iter=10, n_jobs=1, check_input=True, verbose=0, positive=True) elif method == 'multitask': # too many hard-typed parameters !!! n_voxels = X.shape[1] // n_subjects components = np.zeros((X.shape[1], n_components)) clf = MultiTaskLasso(alpha=alpha) clf = MultiTaskElasticNet(alpha=alpha, l1_ratio=l1_ratio) for i in range(n_voxels): x = X[:, i:i + n_subjects * n_voxels:n_voxels] components[i: i + n_subjects * n_voxels: n_voxels] =\ clf.fit(dictionary.T, x).coef_ return dictionary, components
def fit_force_params(self, alpha=None): """ fit sparse linear regression on remaining n_variables-q variables alpha is penalization parameter, None triggers cross validation """ if alpha is None: # do cross validation self.force_model = \ MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1, fit_intercept=False, normalize=False) else: self.force_model = \ MultiTaskLasso(alpha=alpha, fit_intercept=False, normalize=False) self.force_model.fit(self.features_forcing[self.mask_f], self.eps)
class MultiTaskLassoImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def _MTLassoCV_MatchSpace(X, Y, v_pens=None, n_v_cv=5, sample_frac=1, Y_col_block_size=None, se_factor=None, normalize=True, **kwargs): # pylint: disable=missing-param-doc, unused-argument # A fake MT would do Lasso on y_mean = Y.mean(axis=1) if sample_frac < 1: N = X.shape[0] sample = np.random.choice(N, int(sample_frac * N), replace=False) X = X[sample, :] Y = Y[sample, :] if Y_col_block_size is not None: Y = _block_summ_cols(Y, Y_col_block_size) varselectorfit = MultiTaskLassoCV(normalize=normalize, cv=n_v_cv, alphas=v_pens).fit(X, Y) best_v_pen = varselectorfit.alpha_ if se_factor is not None: best_v_pen = _neg_se_rule(varselectorfit, factor=se_factor) varselectorfit = MultiTaskLasso(alpha=best_v_pen, normalize=normalize).fit(X, Y) V = np.sqrt(np.sum(np.square(varselectorfit.coef_), axis=0)) # n_tasks x n_features -> n_feature m_sel = V != 0 transformer = SelMatchSpace(m_sel) return transformer, V[m_sel], best_v_pen, (V, varselectorfit)
def fit_lin_model(self, alpha=None): """ fit sparse linear regression on first q variables alpha is penalization parameter, None triggers cross validation """ if alpha is None: # do cross validation self.lin_model = \ MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1, fit_intercept=False, normalize=False, max_iter=3500) else: self.lin_model = \ MultiTaskLasso(alpha=alpha, fit_intercept=False, normalize=False) self.lin_model.fit(self.features_lin_model[self.mask_l_m], self.delta_v[self.mask_l_m])
def _MTLassoMixed_MatchSpace(X, Y, fit_model_wrapper, v_pens=None, n_v_cv = 5, **kwargs): #pylint: disable=missing-param-doc, unused-argument #Note that MultiTaskLasso(CV).path with the same alpha doesn't produce same results as MultiTaskLasso(CV) mtlasso_cv_fit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas = v_pens).fit(X, Y) #V_cv = np.sqrt(np.sum(np.square(mtlasso_cv_fit.coef_), axis=0)) #n_tasks x n_features -> n_feature #v_pen_cv = mtlasso_cv_fit.alpha_ #m_sel_cv = (V_cv!=0) #sc_fit_cv = fit_model_wrapper(SelMatchSpace(m_sel_cv), V_cv[m_sel_cv]) v_pens = mtlasso_cv_fit.alphas_ #fits_single = {} Vs_single = {} scores = np.zeros((len(v_pens))) #R2s = np.zeros((len(v_pens))) for i, v_pen in enumerate(v_pens): mtlasso_i_fit = MultiTaskLasso(alpha=v_pen, normalize=True).fit(X, Y) V_i = np.sqrt(np.sum(np.square(mtlasso_i_fit.coef_), axis=0)) m_sel_i = (V_i!=0) sc_fit_i = fit_model_wrapper(SelMatchSpace(m_sel_i), V_i[m_sel_i]) #fits_single[i] = sc_fit_i Vs_single[i] = V_i scores[i] = sc_fit_i.score #R2s[i] = sc_fit_i.score_R2 i_best = np.argmin(scores) #v_pen_best = v_pens[i_best] #i_cv = np.where(v_pens==v_pen_cv)[0][0] #print("CV alpha: " + str(v_pen_cv) + " (" + str(R2s[i_cv]) + ")." + " Best alpha: " + str(v_pen_best) + " (" + str(R2s[i_best]) + ") .") best_v_pen = v_pens[i_best] V_best = Vs_single[i_best] m_sel_best = (V_best!=0) return SelMatchSpace(m_sel_best), V_best[m_sel_best], best_v_pen, V_best
def asd_multitasklasso(): model = MultiTaskLasso() f = "/home/vandal.t/repos/pydownscale/pydownscale/test_data/testdata.pkl" data = pickle.load(open(f, 'r')) asdm = ASDMultitask(data, model, season='JJA') asdm.train() out = asdm.predict(test_set=False) out.to_netcdf("test_data/mtl_test.nc")
def test_multi_task_lasso_readonly_data(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] with TempMemmap((X, Y)) as (X, Y): Y = np.c_[y, y] clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
def test_multitasklasso(gaussian_data, fit_intercept, normalize, alpha): X, y = gaussian_data X = [X[0], X[0]] n_samples = y.shape[1] Xty = np.array([xx.T.dot(yy) for xx, yy in zip(X, y)]) alpha_max = np.linalg.norm(Xty, axis=0).max() alpha *= alpha_max / n_samples est = GroupLasso(alpha=alpha, fit_intercept=fit_intercept, normalize=normalize) est.fit(X, y) assert hasattr(est, 'is_fitted_') mtlasso = MultiTaskLasso(alpha=alpha, fit_intercept=fit_intercept, normalize=normalize) mtlasso.fit(X[0], y.T) assert_allclose(est.coef_, mtlasso.coef_.T, rtol=1e-2)
def test_model_multi_task_lasso(self): model, X = fit_regression_model(MultiTaskLasso(), n_targets=2) model_onnx = convert_sklearn( model, "multi-task lasso", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, verbose=False, basename="SklearnMultiTaskLasso-Dec4")
def fit(self, X, y): # check label has form of 2-dim array X, y, = copy.deepcopy(X), copy.deepcopy(y) self.sample_weight = None if y.shape.__len__() != 2: self.classes_ = np.unique(y) self.n_classes_ = self.classes_.__len__() y = self.__one2array(y, self.n_classes_) else: self.classes_ = np.arange(y.shape[1]) self.n_classes_ = self.classes_.__len__() self.W = np.random.uniform(self.lower_bound, self.upper_bound, size=(X.shape[1], self.n_hidden)) self.b = np.random.uniform(self.lower_bound, self.upper_bound, size=self.n_hidden) H = expit(np.dot(X, self.W) + self.b) self.multi_lasso = MultiTaskLasso(self.C, max_iter=self.max_iter).fit(H, y)
def get_hyperparameters_model(): param_dist = {} clf = MultiTaskLasso() model = { 'multi_task_lasso': { 'model': clf, 'param_distributions': param_dist } } return model
def __init__(self, scale=True, kfolds=4, alpha_stepsize=1.0, ncpus=None): """Predict motif activities using Lasso MultiTask regression Parameters ---------- scale : boolean, optional, default True If ``True``, the motif scores will be scaled before classification kfolds : integer, optional, default 5 number of kfolds for parameter search alpha_stepsize : float, optional, default 1.0 stepsize for use in alpha gridsearch ncpus : int, optional Number of threads. Default is the number specified in the config. Attributes ---------- act_ : DataFrame, shape (n_motifs, n_clusters) fitted motif activities sig_ : DataFrame, shape (n_motifs,) boolean values, if coefficients are higher/lower than the 1%t from random permutation """ self.kfolds = kfolds self.act_description = "activity values: coefficients from " "fitted model" self.scale = scale if ncpus is None: ncpus = int(MotifConfig().get_default_params().get("ncpus", 2)) self.ncpus = ncpus # initialize attributes self.act_ = None self.sig_ = None mtk = MultiTaskLasso() parameters = { "alpha": [np.exp(-x) for x in np.arange(0, 10, alpha_stepsize)] } self.clf = GridSearchCV(mtk, parameters, cv=kfolds, n_jobs=self.ncpus, scoring="r2") self.pref_table = "score" self.supported_tables = ["score", "count"] self.ptype = "regression"
def main(): pickledname = sys.argv[1] _qmDL = qmDL() dataset = _qmDL.load(pickledname=pickledname) X, Y, labels = dataset["XX"], dataset["T"], dataset["names"] # 5000 training samples, with 2211 test samples X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=2211, random_state=42) print "Len X train , test:", len(X_train), len(X_test) regressor = MultiTaskLasso().fit(X_train, Y_train) # r = SVR() # regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train) Y_pred = regressor.predict(X_test) print Y_pred print "Y_pred", Y_pred.shape for i in xrange(len(labels)): print "*** MAE ", labels[i], print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
def main(): rng = np.random.RandomState(42) # Generate some 2D coefficients with sine waves with random frequency and phase n_samples, n_features, n_tasks = 100, 30, 40 n_relevant_features = 5 coef = np.zeros((n_tasks, n_features)) times = np.linspace(0, 2 * np.pi, n_tasks) for k in range(n_relevant_features): coef[:, k] = np.sin((1. + rng.randn(1)) * times + 3 * rng.randn(1)) X = rng.randn(n_samples, n_features) Y = np.dot(X, coef.T) + rng.randn(n_samples, n_tasks) coef_lasso_ = np.array([Lasso(alpha=0.5).fit(X, y).coef_ for y in Y.T]) coef_multi_task_lasso_ = MultiTaskLasso(alpha=1.).fit(X, Y).coef_ # ############################################################################# # Plot support and time series fig = plt.figure(figsize=(8, 5)) plt.subplot(1, 2, 1) plt.spy(coef_lasso_) plt.xlabel('Feature') plt.ylabel('Time (or Task)') plt.text(10, 5, 'Lasso') plt.subplot(1, 2, 2) plt.spy(coef_multi_task_lasso_) plt.xlabel('Feature') plt.ylabel('Time (or Task)') plt.text(10, 5, 'MultiTaskLasso') fig.suptitle('Coefficient non-zero location') feature_to_plot = 0 plt.figure() lw = 2 plt.plot(coef[:, feature_to_plot], color='seagreen', linewidth=lw, label='Ground truth') plt.plot(coef_lasso_[:, feature_to_plot], color='cornflowerblue', linewidth=lw, label='Lasso') plt.plot(coef_multi_task_lasso_[:, feature_to_plot], color='gold', linewidth=lw, label='MultiTaskLasso') plt.legend(loc='upper center') plt.axis('tight') plt.ylim([-1.1, 1.1]) plt.show()
def mtlasso_model(self, X_train, y_train, X_test, y_test): mtlasso_model = MultiTaskLasso(alpha=.005) mtlasso_model.fit(X_train, y_train) y_train_pred = mtlasso_model.predict(X_train) y_test_pred = mtlasso_model.predict(X_test) # Scoring the model print(mtlasso_model.score(X_train, y_train)) print(mtlasso_model.score(X_test, y_test)) print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.6f, R^2 test: %.6f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
def test_multi_task_lasso_and_enet(): X, y, X_test, y_test = build_dataset() Y = np.c_[y, y] # Y_test = np.c_[y_test, y_test] clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y) assert 0 < clf.dual_gap_ < 1e-5 assert_array_almost_equal(clf.coef_[0], clf.coef_[1]) clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1) assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
def _get_minimizer(self): """Return the estimator for the method""" # The factor 0.5 for alpha in the Lasso/LassoLars problem is to compensate # 1/(2 * n_sample) factor in OLS term. if self.method == "multi-task": return MultiTaskLasso( alpha=self.cv_lambdas[0] / 2.0, fit_intercept=False, # normalize=False, # precompute=True, max_iter=self.max_iterations, tol=self.tolerance, copy_X=True, # positive=self.positive, random_state=None, warm_start=True, selection="random", ) if self.method == "gradient_decent": return Lasso( alpha=self.cv_lambdas[0] / 2.0, fit_intercept=False, # normalize=False, precompute=True, max_iter=self.max_iterations, tol=self.tolerance, copy_X=True, positive=self.positive, random_state=None, warm_start=True, selection="random", ) if self.method == "lars": return LassoLars( alpha=self.cv_lambdas[0] / 2.0, fit_intercept=False, verbose=True, # normalize=False, precompute="auto", max_iter=self.max_iterations, eps=2.220446049250313e-16, copy_X=True, fit_path=False, positive=self.positive, jitter=None, random_state=None, )
def get_regressors_multitask(nmodels='all'): """ Returns one or all of Multi-task linear regressors """ # 1. MultiTaskElasticNet lr1 = MultiTaskElasticNet() # 2. MultiTaskLasso lr2 = MultiTaskLasso() if (nmodels == 'all'): models = [lr1, lr2] else: models = ['lr' + str(nmodels)] return models
def MultiTaskLasso_regression(self, X_train, y_train, X_test, y_test): alphas = np.logspace(-5, 5, 100) tuned_parameters = [{"alpha": alphas}] my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42) model = MultiTaskLasso() gsearch_cv = GridSearchCV(estimator = model, param_grid = tuned_parameters, scoring = "neg_mean_squared_error", cv = my_cv, n_jobs=-1) gsearch_cv.fit(X_train, y_train) best_model = gsearch_cv.best_estimator_ best_model.fit(X_train, y_train) y_pred = best_model.predict(X_test) mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) return best_model, mse, mae, r2
def multivariate_regression(output_filename): regression_output = open(output_filename, 'w') lm = MultiTaskLasso(alpha=0.1) reg_name = "MTLassoRegression" gcvr2, gr2 = cv_regression(lm, n_data, Game_cols, ["NormalizedLearningGain", "Presence"], show=True) gccvr2, gcr2 = cv_regression(lm, n_data, Game_cols + Comp_cols, ["NormalizedLearningGain", "Presence"], show=True) gaucvr2, gaur2 = cv_regression(lm, n_data, Game_cols + AU_cols, ["NormalizedLearningGain", "Presence"], show=True)
class LELM: upper_bound = 1. lower_bound = -1. def __init__(self, n_hidden, C=1., max_iter=10000): self.n_hidden = n_hidden self.C = C self.max_iter = max_iter def fit(self, X, y): # check label has form of 2-dim array X, y, = copy.deepcopy(X), copy.deepcopy(y) self.sample_weight = None if y.shape.__len__() != 2: self.classes_ = np.unique(y) self.n_classes_ = self.classes_.__len__() y = self.__one2array(y, self.n_classes_) else: self.classes_ = np.arange(y.shape[1]) self.n_classes_ = self.classes_.__len__() self.W = np.random.uniform(self.lower_bound, self.upper_bound, size=(X.shape[1], self.n_hidden)) self.b = np.random.uniform(self.lower_bound, self.upper_bound, size=self.n_hidden) H = expit(np.dot(X, self.W) + self.b) self.multi_lasso = MultiTaskLasso(self.C, max_iter=self.max_iter).fit(H, y) def __one2array(self, y, n_dim): y_expected = np.zeros((y.shape[0], n_dim)) for i in range(y.shape[0]): y_expected[i][y[i]] = 1 return y_expected def predict(self, X): H = expit(np.dot(X, self.W) + self.b) output = self.multi_lasso.predict(H) return output.argmax(axis=1)
def __init__(self, scale=True, kfolds=5, alpha_stepsize=1 / 3.0): """Predict motif activities using Lasso MultiTask regression Parameters ---------- scale : boolean, optional, default True If ``True``, the motif scores will be scaled before classification kfolds : integer, optional, default 5 number of kfolds for parameter search alpha_stepsize : float, optional, default 0.333 stepsize for use in alpha gridsearch Attributes ---------- act_ : DataFrame, shape (n_motifs, n_clusters) fitted motif activities sig_ : DataFrame, shape (n_motifs,) boolean values, if coefficients are higher/lower than the 1%t from random permutation """ self.kfolds = kfolds self.act_description = ("activity values: coefficients from " "fitted model") # initialize attributes self.act_ = None self.sig_ = None mtk = MultiTaskLasso() parameters = { "alpha": [np.exp(-x) for x in np.arange(0, 10, alpha_stepsize)], } self.clf = GridSearchCV(mtk, parameters, cv=kfolds, n_jobs=4)
def run_one_configuration( full_train_covariate_matrix, complete_target, new_valid_covariate_data_frames, new_valid_target_data_frame, std_data_frame, target_clusters, featurizer, model_name, parameters, log_file, ): model_baseline = dict() model_baseline["type"] = model_name model_baseline["target_clusters"] = target_clusters if model_name == "multi_task_lasso": model = MultiTaskLasso(max_iter=5000, **parameters) elif model_name == "xgboost": model = MultiOutputRegressor( XGBRegressor(n_jobs=10, objective="reg:squarederror", verbosity=0, **parameters)) model.fit(featurizer(full_train_covariate_matrix), complete_target.to_numpy(copy=True)) model_baseline["model"] = lambda x: model.predict(featurizer(x)) skill, _, _, _ = location_wise_metric( new_valid_target_data_frame, new_valid_covariate_data_frames, std_data_frame, model_baseline, "skill", ) cos_sim, _, _, _ = location_wise_metric( new_valid_target_data_frame, new_valid_covariate_data_frames, std_data_frame, model_baseline, "cosine-sim", ) with open(log_file, "a") as f: f.write(f"{len(target_clusters)} {parameters} {skill} {cos_sim}\n")
def multi_task_lasso(df): X = df[['X0', 'X1']] # X = df[['X0', 'X1', 'X2', 'X3']] Y = df[['y1', 'y2', 'y3']] mtl_scorer = make_scorer(mtl_roc_auc, greater_is_better=True) mtl_parameters = { 'alpha': uniform(0, 10) } grid_search = RandomizedSearchCV( MultiTaskLasso(fit_intercept=False, alpha=0.05), mtl_parameters, n_iter=200, scoring=mtl_scorer, verbose=10, n_jobs=1, cv=5 ) grid_search.fit(X, Y) print(grid_search.best_params_) print(grid_search.best_score_) print(grid_search.best_estimator_.coef_)
def constrained_multiclass_solve(w, psi, alpha=1.0, quiet=False, **lasso_kws): """ Solve .. math:: \\text{argmin}_s \\|s\\|_0 \\\\ \\text{subject to} \\|w - \\psi s\\|_2^2 \\leq tol """ model = MultiTaskLasso(alpha=alpha, **lasso_kws) if quiet: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) warnings.filterwarnings("ignore", category=UserWarning) model.fit(psi, w) else: model.fit(psi, w) return model.coef_.T
combined_features.fit(X_train_scaled, train_labels.ravel()) #print(pca.explained_variance_ratio_) X_train_reduced = combined_features.transform(X_train_scaled) X_test_reduced = combined_features.transform(X_test_scaled) ## Create K folds k_fold = KFold(Y_train_raw.shape[0], n_folds=10) for train, test in k_fold: X1 = X_train_reduced[train] Y1 = Y_train_raw[train] X2 = X_train_reduced[test] Y2 = Y_train_raw[test] ## Train Classifiers on fold mcl_clf = MultiTaskLasso(alpha=.3) mcl_clf.fit(X1, Y1) ## Score Classifiers on fold mcl_clf_score = mcl_clf.score(X2, Y2) print "MultiTaskLasso: ", mcl_clf_score ## Lasso CV for parameter optimization t1 = time.time() clf = MultiTaskLasso(alpha=.3).fit(X_train_reduced, Y_train_raw) t_lasso_cv = time.time() - t1
from sklearn.decomposition import TruncatedSVD from sklearn.linear_model import MultiTaskLasso from sklearn.cross_validation import train_test_split #splite dataset to get necessary sub-dataset features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42) #pre-process: dimensional reduction(SVD) svd1 = TruncatedSVD(n_components=9,random_state=1).fit(features_train) features_train = svd1.transform(features_train) svd2 = TruncatedSVD(n_components=9,random_state=1).fit(features_test) features_test = svd2.transform(features_test) #do regression mtl = MultiTaskLasso(alpha=0.000000001,random_state=1) mtl.fit(features_train,labels_train) print "MultiTaskLasso",mtl.score(features_test,labels_test) ###################################################################### #this part is used to calculate the Multi-Task Elastic-net's score when the hyper-parameter is optimal #load necessary libs from sklearn.feature_selection import SelectKBest from sklearn.decomposition import TruncatedSVD from sklearn.linear_model import MultiTaskElasticNet from sklearn.cross_validation import train_test_split #splite dataset to get necessary sub-dataset features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42)
def fit(self,X,Y): self.sparsed_X = list() #First, tranlate points to the origin main_centroid = [ np.mean(x) for x in np.transpose(X) ] print 'Main centroid:', main_centroid X = X - main_centroid byClassDict = defaultdict(list) for i in xrange(len(Y)): byClassDict[Y[i]].append(X[i]) class_centroids = dict() centroids_matrix = list() kindexmap = dict() _i = 0 for k in byClassDict: class_centroid = [ np.mean(x) for x in np.transpose(byClassDict[k]) ] #np.mean(byClassDict[k]) _norm = np.linalg.norm(class_centroid) _scaling_factor = _norm**2#(i+1)**2 #+ (i+_norm) #Play with this using _norm, i and any otrher function/constant _centroid = np.array(class_centroid)#*(_scaling_factor) print '*** Class centroid:', _centroid class_centroids[k] = _centroid centroids_matrix.append(_centroid) kindexmap[k] = _i _i+=1 centroids_matrix = np.array(centroids_matrix) ortho_centroids_matrix = np.array(gram_schmidt.gs(centroids_matrix)) ortho_centroids_matrix = normalize(ortho_centroids_matrix) print '*Centroids matrix',centroids_matrix print '*Ortho centroids matrix', ortho_centroids_matrix newX, newY = list(), list() ks = list() for k in byClassDict: #byClassDict[k] = np.array(byClassDict[k]) - centroids_matrix[kindexmap[k]] + np.array(ortho_centroids_matrix[kindexmap[k]]) #class_centroids[k] #this is the basis vector corresponding to current class classvector = np.array(ortho_centroids_matrix[kindexmap[k]]) kScalingFactor = self.support #This section tries to get a good scaling factor for each orthonormal vector maxks = list() for _k in ks: projs = [scalarProjection(x,classvector) for x in byClassDict[_k]] maxk = max(projs) maxks.append(maxk) maxownk = max([scalarProjection(x,classvector) for x in byClassDict[k]]) if len(ks): kScalingFactor = max(maxks) + abs(maxownk) + self.support for v in byClassDict[k]: vv = np.array(v) - centroids_matrix[kindexmap[k]] + classvector*kScalingFactor self.sparsed_X.append(vv) newX.append(v) newY.append(k) ks.append(k) self.sparsed_X = np.array(self.sparsed_X) if self.projectOnSubspace: #Project on to new subspace spawned by class vectors self.sparsed_X = np.dot(self.sparsed_X,np.transpose(centroids_matrix) ) if self.mapperType == 'PIMP': #self.scaler = preprocessing.StandardScaler().fit(self.sparsed_X) #self.sparsed_X = self.scaler.transform(self.sparsed_X) self.transformation_matrix = self.sparsed_X*(np.transpose(np.linalg.pinv(X) ) ) #self.transformation_matrix = X*(np.transpose(np.linalg.pinv(self.sparsed_X) ) ) if self.mapperType == 'Regressor': self.Regressor = MultiTaskLasso(alpha=0.00000001,max_iter=2000) self.Regressor.fit(newX,self.sparsed_X) return self.sparsed_X, newY