Esempio n. 1
0
def findLassoAlpha(alpha, y, X, returnPred=False):
    X_train, X_test = X.loc['2013-10-01':'2015-04-01'], X.loc[
        '2015-05-01':'2016-04-01']
    y_train, y_test = y.loc['2013-10-01':'2015-04-01'], y.loc[
        '2015-05-01':'2016-04-01']
    datestotest = y_test.index
    dt = datestotest[0]
    lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5)
    lassoreg2.fit(X_train, y_train)
    y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1))
    y_pred2 = pd.DataFrame(y_pred2)
    y_pred2.columns = y.columns
    prediction = y_pred2
    X_train = X.loc['2013-10-01':dt]
    y_train = y.loc['2013-10-01':dt]
    for dt in datestotest[1:]:
        lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5)
        lassoreg2.fit(X_train, y_train)
        y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1))
        y_pred2 = pd.DataFrame(y_pred2)
        y_pred2.columns = y.columns
        prediction = pd.concat([prediction, y_pred2])
        X_train = X.loc['2013-10-01':dt]
        y_train = y.loc['2013-10-01':dt]
    prediction.index = y_test.index
    if (returnPred):
        return (y_test, prediction)
    else:
        return mean_squared_error(y_test, prediction)
Esempio n. 2
0
def main():
    pickledname = sys.argv[1]
    _qmDL = qmDL()
    dataset = _qmDL.load(pickledname=pickledname)

    X, Y, labels = dataset['XX'], dataset['T'], dataset['names']

    #5000 training samples, with 2211 test samples
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=2211,
                                                        random_state=42)
    print 'Len X train , test:', len(X_train), len(X_test)

    regressor = MultiTaskLasso().fit(X_train, Y_train)
    #r = SVR()
    #regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train)
    Y_pred = regressor.predict(X_test)

    print Y_pred
    print 'Y_pred', Y_pred.shape

    for i in xrange(len(labels)):
        print '*** MAE ', labels[i],
        print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
Esempio n. 3
0
 def test_dml(self):
     #################################
     #  Single treatment and outcome #
     #################################
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     W = TestPandasIntegration.df[TestPandasIntegration.controls]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat]
     # Test LinearDML
     est = LinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # Test re-fit
     X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns})
     est.fit(Y, T, X=X1, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), feat_comp=X1.columns)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # ForestDML
     est = ForestDML(model_y=GradientBoostingRegressor(),
                     model_t=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     ####################################
     #  Mutiple treatments and outcomes #
     ####################################
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi]
     # Test LinearDML
     est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     est.fit(Y, T, X=X, W=W,
             inference='bootstrap')  # Check bootstrap as well
     self._check_input_names(est.summary(), True, True)
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=MultiTaskLasso(),
                           model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
Esempio n. 4
0
def test_warm_start_multitask_lasso():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    clf = MultiTaskLasso(alpha=0.1, max_iter=5, warm_start=True)
    ignore_warnings(clf.fit)(X, Y)
    ignore_warnings(clf.fit)(X, Y)  # do a second round with 5 iterations

    clf2 = MultiTaskLasso(alpha=0.1, max_iter=10)
    ignore_warnings(clf2.fit)(X, Y)
    assert_array_almost_equal(clf2.coef_, clf.coef_)
Esempio n. 5
0
def constrained_multiclass_solve(w, psi, alpha=1.0, **lasso_kws):
    """
    Solve
    .. math::

        \\text{argmin}_s \\|s\\|_0 \
        \\text{subject to} \\|w - psi s\\|_2^2 \\leq tol
    """
    model = MultiTaskLasso(alpha=alpha, **lasso_kws)
    model.fit(psi, w)
    return model.coef_.T
def make_dictionary(X,
                    n_components=20,
                    alpha=5.,
                    write_dir='/tmp/',
                    contrasts=[],
                    method='multitask',
                    l1_ratio=.5,
                    n_subjects=13):
    """Create dictionary + encoding"""
    from sklearn.decomposition import dict_learning_online, sparse_encode
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import MultiTaskLasso, MultiTaskElasticNet

    mem = Memory(write_dir, verbose=0)
    dictionary = mem.cache(initial_dictionary)(n_components, X)
    np.savez(os.path.join(write_dir, 'dictionary.npz'),
             loadings=dictionary,
             contrasts=contrasts)
    if method == 'online':
        components, dictionary = dict_learning_online(X.T,
                                                      n_components,
                                                      alpha=alpha,
                                                      dict_init=dictionary,
                                                      batch_size=200,
                                                      method='cd',
                                                      return_code=True,
                                                      shuffle=True,
                                                      n_jobs=1,
                                                      positive_code=True)
        np.savez(os.path.join(write_dir, 'dictionary.npz'),
                 loadings=dictionary,
                 contrasts=contrasts)
    elif method == 'sparse':
        components = sparse_encode(X.T,
                                   dictionary,
                                   alpha=alpha,
                                   max_iter=10,
                                   n_jobs=1,
                                   check_input=True,
                                   verbose=0,
                                   positive=True)
    elif method == 'multitask':
        # too many hard-typed parameters !!!
        n_voxels = X.shape[1] // n_subjects
        components = np.zeros((X.shape[1], n_components))
        clf = MultiTaskLasso(alpha=alpha)
        clf = MultiTaskElasticNet(alpha=alpha, l1_ratio=l1_ratio)
        for i in range(n_voxels):
            x = X[:, i:i + n_subjects * n_voxels:n_voxels]
            components[i: i + n_subjects * n_voxels: n_voxels] =\
                clf.fit(dictionary.T, x).coef_
    return dictionary, components
 def fit_force_params(self, alpha=None):
     """
     fit sparse linear regression on remaining n_variables-q variables
     alpha is penalization parameter, None triggers cross validation
     """
     if alpha is None:  # do cross validation
         self.force_model = \
             MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                              fit_intercept=False, normalize=False)
     else:
         self.force_model = \
             MultiTaskLasso(alpha=alpha, fit_intercept=False,
                            normalize=False)
     self.force_model.fit(self.features_forcing[self.mask_f], self.eps)
Esempio n. 8
0
class MultiTaskLassoImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Esempio n. 9
0
def _MTLassoCV_MatchSpace(X,
                          Y,
                          v_pens=None,
                          n_v_cv=5,
                          sample_frac=1,
                          Y_col_block_size=None,
                          se_factor=None,
                          normalize=True,
                          **kwargs):  # pylint: disable=missing-param-doc, unused-argument
    # A fake MT would do Lasso on y_mean = Y.mean(axis=1)
    if sample_frac < 1:
        N = X.shape[0]
        sample = np.random.choice(N, int(sample_frac * N), replace=False)
        X = X[sample, :]
        Y = Y[sample, :]
    if Y_col_block_size is not None:
        Y = _block_summ_cols(Y, Y_col_block_size)
    varselectorfit = MultiTaskLassoCV(normalize=normalize,
                                      cv=n_v_cv,
                                      alphas=v_pens).fit(X, Y)
    best_v_pen = varselectorfit.alpha_
    if se_factor is not None:
        best_v_pen = _neg_se_rule(varselectorfit, factor=se_factor)
        varselectorfit = MultiTaskLasso(alpha=best_v_pen,
                                        normalize=normalize).fit(X, Y)
    V = np.sqrt(np.sum(np.square(varselectorfit.coef_),
                       axis=0))  # n_tasks x n_features -> n_feature
    m_sel = V != 0
    transformer = SelMatchSpace(m_sel)
    return transformer, V[m_sel], best_v_pen, (V, varselectorfit)
 def fit_lin_model(self, alpha=None):
     """
     fit sparse linear regression on first q variables
     alpha is penalization parameter, None triggers cross validation
     """
     if alpha is None:  # do cross validation
         self.lin_model = \
             MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                              fit_intercept=False, normalize=False,
                              max_iter=3500)
     else:
         self.lin_model = \
             MultiTaskLasso(alpha=alpha, fit_intercept=False,
                            normalize=False)
     self.lin_model.fit(self.features_lin_model[self.mask_l_m],
                        self.delta_v[self.mask_l_m])
Esempio n. 11
0
def _MTLassoMixed_MatchSpace(X, Y, fit_model_wrapper, v_pens=None, n_v_cv = 5, **kwargs): #pylint: disable=missing-param-doc, unused-argument
    #Note that MultiTaskLasso(CV).path with the same alpha doesn't produce same results as MultiTaskLasso(CV)
    mtlasso_cv_fit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas = v_pens).fit(X, Y)
    #V_cv = np.sqrt(np.sum(np.square(mtlasso_cv_fit.coef_), axis=0)) #n_tasks x n_features -> n_feature
    #v_pen_cv = mtlasso_cv_fit.alpha_
    #m_sel_cv = (V_cv!=0)
    #sc_fit_cv = fit_model_wrapper(SelMatchSpace(m_sel_cv), V_cv[m_sel_cv])

    v_pens = mtlasso_cv_fit.alphas_
    #fits_single = {}
    Vs_single = {}
    scores = np.zeros((len(v_pens)))
    #R2s = np.zeros((len(v_pens)))
    for i, v_pen in enumerate(v_pens):
        mtlasso_i_fit = MultiTaskLasso(alpha=v_pen, normalize=True).fit(X, Y)
        V_i = np.sqrt(np.sum(np.square(mtlasso_i_fit.coef_), axis=0))
        m_sel_i = (V_i!=0)
        sc_fit_i = fit_model_wrapper(SelMatchSpace(m_sel_i), V_i[m_sel_i])
        #fits_single[i] = sc_fit_i
        Vs_single[i] = V_i
        scores[i] = sc_fit_i.score
        #R2s[i] = sc_fit_i.score_R2

    i_best = np.argmin(scores)
    #v_pen_best = v_pens[i_best]
    #i_cv = np.where(v_pens==v_pen_cv)[0][0]
    #print("CV alpha: " + str(v_pen_cv) + " (" + str(R2s[i_cv]) + ")." + " Best alpha: " + str(v_pen_best) + " (" + str(R2s[i_best]) + ") .")
    best_v_pen = v_pens[i_best]
    V_best = Vs_single[i_best]
    m_sel_best = (V_best!=0)
    return SelMatchSpace(m_sel_best), V_best[m_sel_best], best_v_pen, V_best
Esempio n. 12
0
def asd_multitasklasso():
    model = MultiTaskLasso()
    f = "/home/vandal.t/repos/pydownscale/pydownscale/test_data/testdata.pkl"
    data = pickle.load(open(f, 'r'))
    asdm = ASDMultitask(data, model, season='JJA')
    asdm.train()
    out = asdm.predict(test_set=False)
    out.to_netcdf("test_data/mtl_test.nc")
Esempio n. 13
0
def test_multi_task_lasso_readonly_data():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    with TempMemmap((X, Y)) as (X, Y):
        Y = np.c_[y, y]
        clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
        assert 0 < clf.dual_gap_ < 1e-5
        assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
Esempio n. 14
0
def test_multitasklasso(gaussian_data, fit_intercept, normalize, alpha):

    X, y = gaussian_data
    X = [X[0], X[0]]
    n_samples = y.shape[1]

    Xty = np.array([xx.T.dot(yy) for xx, yy in zip(X, y)])
    alpha_max = np.linalg.norm(Xty, axis=0).max()
    alpha *= alpha_max / n_samples
    est = GroupLasso(alpha=alpha,
                     fit_intercept=fit_intercept,
                     normalize=normalize)
    est.fit(X, y)
    assert hasattr(est, 'is_fitted_')

    mtlasso = MultiTaskLasso(alpha=alpha,
                             fit_intercept=fit_intercept,
                             normalize=normalize)
    mtlasso.fit(X[0], y.T)
    assert_allclose(est.coef_, mtlasso.coef_.T, rtol=1e-2)
Esempio n. 15
0
 def test_model_multi_task_lasso(self):
     model, X = fit_regression_model(MultiTaskLasso(), n_targets=2)
     model_onnx = convert_sklearn(
         model, "multi-task lasso",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         verbose=False,
                         basename="SklearnMultiTaskLasso-Dec4")
Esempio n. 16
0
 def fit(self, X, y):
     # check label has form of 2-dim array
     X, y, = copy.deepcopy(X), copy.deepcopy(y)
     self.sample_weight = None
     if y.shape.__len__() != 2:
         self.classes_ = np.unique(y)
         self.n_classes_ = self.classes_.__len__()
         y = self.__one2array(y, self.n_classes_)
     else:
         self.classes_ = np.arange(y.shape[1])
         self.n_classes_ = self.classes_.__len__()
     self.W = np.random.uniform(self.lower_bound,
                                self.upper_bound,
                                size=(X.shape[1], self.n_hidden))
     self.b = np.random.uniform(self.lower_bound,
                                self.upper_bound,
                                size=self.n_hidden)
     H = expit(np.dot(X, self.W) + self.b)
     self.multi_lasso = MultiTaskLasso(self.C,
                                       max_iter=self.max_iter).fit(H, y)
Esempio n. 17
0
def get_hyperparameters_model():
    param_dist = {}

    clf = MultiTaskLasso()

    model = {
        'multi_task_lasso': {
            'model': clf,
            'param_distributions': param_dist
        }
    }
    return model
Esempio n. 18
0
    def __init__(self, scale=True, kfolds=4, alpha_stepsize=1.0, ncpus=None):
        """Predict motif activities using Lasso MultiTask regression

        Parameters
        ----------
        scale : boolean, optional, default True
            If ``True``, the motif scores will be scaled
            before classification

        kfolds : integer, optional, default 5
            number of kfolds for parameter search

        alpha_stepsize : float, optional, default 1.0
            stepsize for use in alpha gridsearch

        ncpus : int, optional
            Number of threads. Default is the number specified in the config.

        Attributes
        ----------
        act_ : DataFrame, shape (n_motifs, n_clusters)
            fitted motif activities

        sig_ : DataFrame, shape (n_motifs,)
            boolean values, if coefficients are higher/lower than
            the 1%t from random permutation
        """

        self.kfolds = kfolds
        self.act_description = "activity values: coefficients from " "fitted model"

        self.scale = scale
        if ncpus is None:
            ncpus = int(MotifConfig().get_default_params().get("ncpus", 2))
        self.ncpus = ncpus

        # initialize attributes
        self.act_ = None
        self.sig_ = None

        mtk = MultiTaskLasso()
        parameters = {
            "alpha": [np.exp(-x) for x in np.arange(0, 10, alpha_stepsize)]
        }
        self.clf = GridSearchCV(mtk,
                                parameters,
                                cv=kfolds,
                                n_jobs=self.ncpus,
                                scoring="r2")
        self.pref_table = "score"
        self.supported_tables = ["score", "count"]
        self.ptype = "regression"
Esempio n. 19
0
def main():
    pickledname = sys.argv[1]
    _qmDL = qmDL()
    dataset = _qmDL.load(pickledname=pickledname)

    X, Y, labels = dataset["XX"], dataset["T"], dataset["names"]

    # 5000 training samples, with 2211 test samples
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=2211, random_state=42)
    print "Len X train , test:", len(X_train), len(X_test)

    regressor = MultiTaskLasso().fit(X_train, Y_train)
    # r = SVR()
    # regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train)
    Y_pred = regressor.predict(X_test)

    print Y_pred
    print "Y_pred", Y_pred.shape

    for i in xrange(len(labels)):
        print "*** MAE ", labels[i],
        print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
def main():
    rng = np.random.RandomState(42)

    # Generate some 2D coefficients with sine waves with random frequency and phase
    n_samples, n_features, n_tasks = 100, 30, 40
    n_relevant_features = 5
    coef = np.zeros((n_tasks, n_features))
    times = np.linspace(0, 2 * np.pi, n_tasks)
    for k in range(n_relevant_features):
        coef[:, k] = np.sin((1. + rng.randn(1)) * times + 3 * rng.randn(1))

    X = rng.randn(n_samples, n_features)
    Y = np.dot(X, coef.T) + rng.randn(n_samples, n_tasks)

    coef_lasso_ = np.array([Lasso(alpha=0.5).fit(X, y).coef_ for y in Y.T])
    coef_multi_task_lasso_ = MultiTaskLasso(alpha=1.).fit(X, Y).coef_

    # #############################################################################
    # Plot support and time series
    fig = plt.figure(figsize=(8, 5))
    plt.subplot(1, 2, 1)
    plt.spy(coef_lasso_)
    plt.xlabel('Feature')
    plt.ylabel('Time (or Task)')
    plt.text(10, 5, 'Lasso')
    plt.subplot(1, 2, 2)
    plt.spy(coef_multi_task_lasso_)
    plt.xlabel('Feature')
    plt.ylabel('Time (or Task)')
    plt.text(10, 5, 'MultiTaskLasso')
    fig.suptitle('Coefficient non-zero location')

    feature_to_plot = 0
    plt.figure()
    lw = 2
    plt.plot(coef[:, feature_to_plot],
             color='seagreen',
             linewidth=lw,
             label='Ground truth')
    plt.plot(coef_lasso_[:, feature_to_plot],
             color='cornflowerblue',
             linewidth=lw,
             label='Lasso')
    plt.plot(coef_multi_task_lasso_[:, feature_to_plot],
             color='gold',
             linewidth=lw,
             label='MultiTaskLasso')
    plt.legend(loc='upper center')
    plt.axis('tight')
    plt.ylim([-1.1, 1.1])
    plt.show()
Esempio n. 21
0
    def mtlasso_model(self, X_train, y_train, X_test, y_test):

        mtlasso_model = MultiTaskLasso(alpha=.005)

        mtlasso_model.fit(X_train, y_train)

        y_train_pred = mtlasso_model.predict(X_train)
        y_test_pred = mtlasso_model.predict(X_test)

        # Scoring the model
        print(mtlasso_model.score(X_train, y_train))
        print(mtlasso_model.score(X_test, y_test))
        print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error(
            y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)))
        print('R^2 train: %.6f, R^2 test: %.6f' %
              (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
Esempio n. 22
0
def test_multi_task_lasso_and_enet():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    # Y_test = np.c_[y_test, y_test]
    clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1)
    assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
Esempio n. 23
0
    def _get_minimizer(self):
        """Return the estimator for the method"""
        # The factor 0.5 for alpha in the Lasso/LassoLars problem is to compensate
        # 1/(2 * n_sample) factor in OLS term.
        if self.method == "multi-task":
            return MultiTaskLasso(
                alpha=self.cv_lambdas[0] / 2.0,
                fit_intercept=False,
                # normalize=False,
                # precompute=True,
                max_iter=self.max_iterations,
                tol=self.tolerance,
                copy_X=True,
                # positive=self.positive,
                random_state=None,
                warm_start=True,
                selection="random",
            )

        if self.method == "gradient_decent":
            return Lasso(
                alpha=self.cv_lambdas[0] / 2.0,
                fit_intercept=False,
                # normalize=False,
                precompute=True,
                max_iter=self.max_iterations,
                tol=self.tolerance,
                copy_X=True,
                positive=self.positive,
                random_state=None,
                warm_start=True,
                selection="random",
            )

        if self.method == "lars":
            return LassoLars(
                alpha=self.cv_lambdas[0] / 2.0,
                fit_intercept=False,
                verbose=True,
                # normalize=False,
                precompute="auto",
                max_iter=self.max_iterations,
                eps=2.220446049250313e-16,
                copy_X=True,
                fit_path=False,
                positive=self.positive,
                jitter=None,
                random_state=None,
            )
def get_regressors_multitask(nmodels='all'):
    """
		Returns one or all of Multi-task linear regressors 
	"""
    # 1. MultiTaskElasticNet
    lr1 = MultiTaskElasticNet()

    # 2. MultiTaskLasso
    lr2 = MultiTaskLasso()

    if (nmodels == 'all'):
        models = [lr1, lr2]
    else:
        models = ['lr' + str(nmodels)]

    return models
Esempio n. 25
0
 def MultiTaskLasso_regression(self, X_train, y_train, X_test, y_test):
     
     alphas = np.logspace(-5, 5, 100)
     tuned_parameters = [{"alpha": alphas}]
     my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)
     model = MultiTaskLasso()
     gsearch_cv = GridSearchCV(estimator = model, param_grid = tuned_parameters, 
                               scoring = "neg_mean_squared_error", cv = my_cv, n_jobs=-1)
     gsearch_cv.fit(X_train, y_train)
     best_model = gsearch_cv.best_estimator_
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     mae = mean_absolute_error(y_test, y_pred)
     mse = mean_squared_error(y_test, y_pred)
     r2 = r2_score(y_test, y_pred)
     
     return best_model, mse, mae, r2
def multivariate_regression(output_filename):
    regression_output = open(output_filename, 'w')

    lm = MultiTaskLasso(alpha=0.1)
    reg_name = "MTLassoRegression"

    gcvr2, gr2 = cv_regression(lm,
                               n_data,
                               Game_cols,
                               ["NormalizedLearningGain", "Presence"],
                               show=True)
    gccvr2, gcr2 = cv_regression(lm,
                                 n_data,
                                 Game_cols + Comp_cols,
                                 ["NormalizedLearningGain", "Presence"],
                                 show=True)
    gaucvr2, gaur2 = cv_regression(lm,
                                   n_data,
                                   Game_cols + AU_cols,
                                   ["NormalizedLearningGain", "Presence"],
                                   show=True)
Esempio n. 27
0
class LELM:
    upper_bound = 1.
    lower_bound = -1.

    def __init__(self, n_hidden, C=1., max_iter=10000):
        self.n_hidden = n_hidden
        self.C = C
        self.max_iter = max_iter

    def fit(self, X, y):
        # check label has form of 2-dim array
        X, y, = copy.deepcopy(X), copy.deepcopy(y)
        self.sample_weight = None
        if y.shape.__len__() != 2:
            self.classes_ = np.unique(y)
            self.n_classes_ = self.classes_.__len__()
            y = self.__one2array(y, self.n_classes_)
        else:
            self.classes_ = np.arange(y.shape[1])
            self.n_classes_ = self.classes_.__len__()
        self.W = np.random.uniform(self.lower_bound,
                                   self.upper_bound,
                                   size=(X.shape[1], self.n_hidden))
        self.b = np.random.uniform(self.lower_bound,
                                   self.upper_bound,
                                   size=self.n_hidden)
        H = expit(np.dot(X, self.W) + self.b)
        self.multi_lasso = MultiTaskLasso(self.C,
                                          max_iter=self.max_iter).fit(H, y)

    def __one2array(self, y, n_dim):
        y_expected = np.zeros((y.shape[0], n_dim))
        for i in range(y.shape[0]):
            y_expected[i][y[i]] = 1
        return y_expected

    def predict(self, X):
        H = expit(np.dot(X, self.W) + self.b)
        output = self.multi_lasso.predict(H)
        return output.argmax(axis=1)
Esempio n. 28
0
    def __init__(self, scale=True, kfolds=5, alpha_stepsize=1 / 3.0):
        """Predict motif activities using Lasso MultiTask regression

        Parameters
        ----------
        scale : boolean, optional, default True
            If ``True``, the motif scores will be scaled 
            before classification
 
        kfolds : integer, optional, default 5
            number of kfolds for parameter search
        
        alpha_stepsize : float, optional, default 0.333
            stepsize for use in alpha gridsearch

        Attributes
        ----------
        act_ : DataFrame, shape (n_motifs, n_clusters)
            fitted motif activities
    
        sig_ : DataFrame, shape (n_motifs,)
            boolean values, if coefficients are higher/lower than
            the 1%t from random permutation
        """

        self.kfolds = kfolds
        self.act_description = ("activity values: coefficients from "
                                "fitted model")

        # initialize attributes
        self.act_ = None
        self.sig_ = None

        mtk = MultiTaskLasso()
        parameters = {
            "alpha": [np.exp(-x) for x in np.arange(0, 10, alpha_stepsize)],
        }
        self.clf = GridSearchCV(mtk, parameters, cv=kfolds, n_jobs=4)
Esempio n. 29
0
def run_one_configuration(
    full_train_covariate_matrix,
    complete_target,
    new_valid_covariate_data_frames,
    new_valid_target_data_frame,
    std_data_frame,
    target_clusters,
    featurizer,
    model_name,
    parameters,
    log_file,
):
    model_baseline = dict()
    model_baseline["type"] = model_name
    model_baseline["target_clusters"] = target_clusters

    if model_name == "multi_task_lasso":
        model = MultiTaskLasso(max_iter=5000, **parameters)
    elif model_name == "xgboost":
        model = MultiOutputRegressor(
            XGBRegressor(n_jobs=10,
                         objective="reg:squarederror",
                         verbosity=0,
                         **parameters))

    model.fit(featurizer(full_train_covariate_matrix),
              complete_target.to_numpy(copy=True))
    model_baseline["model"] = lambda x: model.predict(featurizer(x))

    skill, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "skill",
    )
    cos_sim, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "cosine-sim",
    )
    with open(log_file, "a") as f:
        f.write(f"{len(target_clusters)} {parameters} {skill} {cos_sim}\n")
Esempio n. 30
0
def multi_task_lasso(df):
    X = df[['X0', 'X1']]
    # X = df[['X0', 'X1', 'X2', 'X3']]
    Y = df[['y1', 'y2', 'y3']]

    mtl_scorer = make_scorer(mtl_roc_auc, greater_is_better=True)
    mtl_parameters = {
        'alpha': uniform(0, 10)
    }

    grid_search = RandomizedSearchCV(
        MultiTaskLasso(fit_intercept=False, alpha=0.05),
        mtl_parameters,
        n_iter=200,
        scoring=mtl_scorer,
        verbose=10,
        n_jobs=1,
        cv=5
    )
    grid_search.fit(X, Y)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print(grid_search.best_estimator_.coef_)
Esempio n. 31
0
def constrained_multiclass_solve(w, psi, alpha=1.0, quiet=False, **lasso_kws):
    """
    Solve

    .. math::

        \\text{argmin}_s \\|s\\|_0 \\\\
        \\text{subject to} \\|w - \\psi s\\|_2^2 \\leq tol
    """
    model = MultiTaskLasso(alpha=alpha, **lasso_kws)

    if quiet:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=RuntimeWarning)
            warnings.filterwarnings("ignore", category=UserWarning)
            model.fit(psi, w)
    else:
        model.fit(psi, w)

    return model.coef_.T
combined_features.fit(X_train_scaled, train_labels.ravel())
#print(pca.explained_variance_ratio_) 
X_train_reduced = combined_features.transform(X_train_scaled)
X_test_reduced = combined_features.transform(X_test_scaled)

## Create K folds
k_fold = KFold(Y_train_raw.shape[0], n_folds=10)
for train, test in k_fold:
    X1 = X_train_reduced[train]
    Y1 = Y_train_raw[train]
    
    X2 = X_train_reduced[test]
    Y2 = Y_train_raw[test]    

    ## Train Classifiers on fold
    mcl_clf = MultiTaskLasso(alpha=.3)
    mcl_clf.fit(X1, Y1)


    ## Score Classifiers on fold

    mcl_clf_score = mcl_clf.score(X2, Y2)

    print "MultiTaskLasso:  ", mcl_clf_score



## Lasso CV for parameter optimization
t1 = time.time()
clf = MultiTaskLasso(alpha=.3).fit(X_train_reduced, Y_train_raw)
t_lasso_cv = time.time() - t1
Esempio n. 33
0
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import MultiTaskLasso
from sklearn.cross_validation import train_test_split

#splite dataset to get necessary sub-dataset
features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42)

#pre-process: dimensional reduction(SVD)
svd1 = TruncatedSVD(n_components=9,random_state=1).fit(features_train)
features_train = svd1.transform(features_train)

svd2 = TruncatedSVD(n_components=9,random_state=1).fit(features_test)
features_test = svd2.transform(features_test)

#do regression
mtl = MultiTaskLasso(alpha=0.000000001,random_state=1)
mtl.fit(features_train,labels_train)
print "MultiTaskLasso",mtl.score(features_test,labels_test)

######################################################################
#this part is used to calculate the Multi-Task Elastic-net's score when the hyper-parameter is optimal 

#load necessary libs 
from sklearn.feature_selection import SelectKBest
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.cross_validation import train_test_split

#splite dataset to get necessary sub-dataset
features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42)
Esempio n. 34
0
    def fit(self,X,Y):
        self.sparsed_X = list()
        #First, tranlate points to the origin
        main_centroid = [ np.mean(x) for x in np.transpose(X) ]
        print 'Main centroid:', main_centroid
        X = X - main_centroid

        byClassDict = defaultdict(list)
        for i in xrange(len(Y)):
            byClassDict[Y[i]].append(X[i])


        class_centroids = dict()

        centroids_matrix = list()
        kindexmap = dict()

        _i = 0
        for k in byClassDict:
            class_centroid = [ np.mean(x) for x in np.transpose(byClassDict[k]) ] #np.mean(byClassDict[k])
            _norm = np.linalg.norm(class_centroid)
            _scaling_factor = _norm**2#(i+1)**2 #+ (i+_norm)  #Play with this using _norm, i and any otrher function/constant
            _centroid = np.array(class_centroid)#*(_scaling_factor)
            print '*** Class centroid:', _centroid
            class_centroids[k] = _centroid
            centroids_matrix.append(_centroid)
            kindexmap[k] = _i
            _i+=1

        centroids_matrix = np.array(centroids_matrix)
        ortho_centroids_matrix = np.array(gram_schmidt.gs(centroids_matrix))
        ortho_centroids_matrix = normalize(ortho_centroids_matrix)

        print '*Centroids matrix',centroids_matrix
        print '*Ortho centroids matrix', ortho_centroids_matrix


        newX, newY = list(), list()
        ks = list()
        for k in byClassDict:
            #byClassDict[k] = np.array(byClassDict[k]) - centroids_matrix[kindexmap[k]] + np.array(ortho_centroids_matrix[kindexmap[k]]) #class_centroids[k]

            #this is the basis vector corresponding to current class
            classvector = np.array(ortho_centroids_matrix[kindexmap[k]])
            kScalingFactor = self.support

            #This section tries to get a good scaling factor for each orthonormal vector
            maxks = list()
            for _k in ks:
                projs = [scalarProjection(x,classvector) for x in byClassDict[_k]]
                maxk = max(projs)
                maxks.append(maxk)

                maxownk = max([scalarProjection(x,classvector) for x in byClassDict[k]])

            if len(ks):
                kScalingFactor = max(maxks) + abs(maxownk) + self.support


            for v in byClassDict[k]:
                vv = np.array(v) - centroids_matrix[kindexmap[k]] + classvector*kScalingFactor
                self.sparsed_X.append(vv)
                newX.append(v)
                newY.append(k)
                ks.append(k)

        self.sparsed_X = np.array(self.sparsed_X)

        if self.projectOnSubspace:
            #Project on to new subspace spawned by class vectors
            self.sparsed_X = np.dot(self.sparsed_X,np.transpose(centroids_matrix) )


        if self.mapperType == 'PIMP':
            #self.scaler = preprocessing.StandardScaler().fit(self.sparsed_X)
            #self.sparsed_X = self.scaler.transform(self.sparsed_X)

            self.transformation_matrix = self.sparsed_X*(np.transpose(np.linalg.pinv(X) ) )
            #self.transformation_matrix = X*(np.transpose(np.linalg.pinv(self.sparsed_X) ) )

        if self.mapperType == 'Regressor':
            self.Regressor = MultiTaskLasso(alpha=0.00000001,max_iter=2000)
            self.Regressor.fit(newX,self.sparsed_X)

        return self.sparsed_X, newY