Esempio n. 1
0
def findLassoAlpha(alpha, y, X, returnPred=False):
    X_train, X_test = X.loc['2013-10-01':'2015-04-01'], X.loc[
        '2015-05-01':'2016-04-01']
    y_train, y_test = y.loc['2013-10-01':'2015-04-01'], y.loc[
        '2015-05-01':'2016-04-01']
    datestotest = y_test.index
    dt = datestotest[0]
    lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5)
    lassoreg2.fit(X_train, y_train)
    y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1))
    y_pred2 = pd.DataFrame(y_pred2)
    y_pred2.columns = y.columns
    prediction = y_pred2
    X_train = X.loc['2013-10-01':dt]
    y_train = y.loc['2013-10-01':dt]
    for dt in datestotest[1:]:
        lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5)
        lassoreg2.fit(X_train, y_train)
        y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1))
        y_pred2 = pd.DataFrame(y_pred2)
        y_pred2.columns = y.columns
        prediction = pd.concat([prediction, y_pred2])
        X_train = X.loc['2013-10-01':dt]
        y_train = y.loc['2013-10-01':dt]
    prediction.index = y_test.index
    if (returnPred):
        return (y_test, prediction)
    else:
        return mean_squared_error(y_test, prediction)
Esempio n. 2
0
 def test_dml(self):
     #################################
     #  Single treatment and outcome #
     #################################
     X = TestPandasIntegration.df[TestPandasIntegration.features]
     W = TestPandasIntegration.df[TestPandasIntegration.controls]
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat]
     # Test LinearDML
     est = LinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # Test re-fit
     X1 = X.rename(columns={c: "{}_1".format(c) for c in X.columns})
     est.fit(Y, T, X=X1, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), feat_comp=X1.columns)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=LassoCV(), model_t=LassoCV())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(
         est.summary())  # Check that names propagate as expected
     # ForestDML
     est = ForestDML(model_y=GradientBoostingRegressor(),
                     model_t=GradientBoostingRegressor())
     est.fit(Y, T, X=X, W=W, inference='blb')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     ####################################
     #  Mutiple treatments and outcomes #
     ####################################
     Y = TestPandasIntegration.df[TestPandasIntegration.outcome_multi]
     T = TestPandasIntegration.df[TestPandasIntegration.cont_treat_multi]
     # Test LinearDML
     est = LinearDML(model_y=MultiTaskLasso(), model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='statsmodels')
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     est.fit(Y, T, X=X, W=W,
             inference='bootstrap')  # Check bootstrap as well
     self._check_input_names(est.summary(), True, True)
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
     # Test SparseLinearDML
     est = SparseLinearDML(model_y=MultiTaskLasso(),
                           model_t=MultiTaskLasso())
     est.fit(Y, T, X=X, W=W, inference='debiasedlasso')
     treatment_effects = est.effect(X)
     lb, ub = est.effect_interval(X, alpha=0.05)
     self._check_input_names(est.summary(), True,
                             True)  # Check that names propagate as expected
     self._check_popsum_names(
         est.effect_inference(X).population_summary(), True)
Esempio n. 3
0
def test_warm_start_multitask_lasso():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    clf = MultiTaskLasso(alpha=0.1, max_iter=5, warm_start=True)
    ignore_warnings(clf.fit)(X, Y)
    ignore_warnings(clf.fit)(X, Y)  # do a second round with 5 iterations

    clf2 = MultiTaskLasso(alpha=0.1, max_iter=10)
    ignore_warnings(clf2.fit)(X, Y)
    assert_array_almost_equal(clf2.coef_, clf.coef_)
Esempio n. 4
0
def main():
    pickledname = sys.argv[1]
    _qmDL = qmDL()
    dataset = _qmDL.load(pickledname=pickledname)

    X, Y, labels = dataset['XX'], dataset['T'], dataset['names']

    #5000 training samples, with 2211 test samples
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=2211,
                                                        random_state=42)
    print 'Len X train , test:', len(X_train), len(X_test)

    regressor = MultiTaskLasso().fit(X_train, Y_train)
    #r = SVR()
    #regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train)
    Y_pred = regressor.predict(X_test)

    print Y_pred
    print 'Y_pred', Y_pred.shape

    for i in xrange(len(labels)):
        print '*** MAE ', labels[i],
        print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
Esempio n. 5
0
def get_signature_genes(X, n, lda=10):
    W = np.zeros((X.shape[0], X.shape[0]))
    # coarse search from the bottom
    while (abs(W).sum(1) > 0).sum() < n:
        lda /= 10.
        model = MultiTaskLasso(alpha=lda,
                               max_iter=100,
                               tol=.001,
                               selection='random',
                               warm_start=True)
        model.fit(X.T, X.T)
        W = model.coef_.T
        #print len(np.nonzero(abs(W).sum(1))[0]),model.score(X.T,X.T)
    # fine search from the top
    while (abs(W).sum(1) > 0).sum() > n * 1.2:
        lda *= 2.
        model.set_params(alpha=lda)
        model.fit(X.T, X.T)
        W = model.coef_.T
        #print len(np.nonzero(abs(W).sum(1))[0]),model.score(X.T,X.T)
    # finer search
    while (abs(W).sum(1) > 0).sum() > n:
        lda *= 1.1
        model.set_params(alpha=lda)
        model.fit(X.T, X.T)
        W = model.coef_.T
        #print len(np.nonzero(abs(W).sum(1))[0]),model.score(X.T,X.T)
    return np.nonzero(abs(W).sum(1))[0]
Esempio n. 6
0
def _MTLassoMixed_MatchSpace(X, Y, fit_model_wrapper, v_pens=None, n_v_cv = 5, **kwargs): #pylint: disable=missing-param-doc, unused-argument
    #Note that MultiTaskLasso(CV).path with the same alpha doesn't produce same results as MultiTaskLasso(CV)
    mtlasso_cv_fit = MultiTaskLassoCV(normalize=True, cv=n_v_cv, alphas = v_pens).fit(X, Y)
    #V_cv = np.sqrt(np.sum(np.square(mtlasso_cv_fit.coef_), axis=0)) #n_tasks x n_features -> n_feature
    #v_pen_cv = mtlasso_cv_fit.alpha_
    #m_sel_cv = (V_cv!=0)
    #sc_fit_cv = fit_model_wrapper(SelMatchSpace(m_sel_cv), V_cv[m_sel_cv])

    v_pens = mtlasso_cv_fit.alphas_
    #fits_single = {}
    Vs_single = {}
    scores = np.zeros((len(v_pens)))
    #R2s = np.zeros((len(v_pens)))
    for i, v_pen in enumerate(v_pens):
        mtlasso_i_fit = MultiTaskLasso(alpha=v_pen, normalize=True).fit(X, Y)
        V_i = np.sqrt(np.sum(np.square(mtlasso_i_fit.coef_), axis=0))
        m_sel_i = (V_i!=0)
        sc_fit_i = fit_model_wrapper(SelMatchSpace(m_sel_i), V_i[m_sel_i])
        #fits_single[i] = sc_fit_i
        Vs_single[i] = V_i
        scores[i] = sc_fit_i.score
        #R2s[i] = sc_fit_i.score_R2

    i_best = np.argmin(scores)
    #v_pen_best = v_pens[i_best]
    #i_cv = np.where(v_pens==v_pen_cv)[0][0]
    #print("CV alpha: " + str(v_pen_cv) + " (" + str(R2s[i_cv]) + ")." + " Best alpha: " + str(v_pen_best) + " (" + str(R2s[i_best]) + ") .")
    best_v_pen = v_pens[i_best]
    V_best = Vs_single[i_best]
    m_sel_best = (V_best!=0)
    return SelMatchSpace(m_sel_best), V_best[m_sel_best], best_v_pen, V_best
Esempio n. 7
0
def _MTLassoCV_MatchSpace(X,
                          Y,
                          v_pens=None,
                          n_v_cv=5,
                          sample_frac=1,
                          Y_col_block_size=None,
                          se_factor=None,
                          normalize=True,
                          **kwargs):  # pylint: disable=missing-param-doc, unused-argument
    # A fake MT would do Lasso on y_mean = Y.mean(axis=1)
    if sample_frac < 1:
        N = X.shape[0]
        sample = np.random.choice(N, int(sample_frac * N), replace=False)
        X = X[sample, :]
        Y = Y[sample, :]
    if Y_col_block_size is not None:
        Y = _block_summ_cols(Y, Y_col_block_size)
    varselectorfit = MultiTaskLassoCV(normalize=normalize,
                                      cv=n_v_cv,
                                      alphas=v_pens).fit(X, Y)
    best_v_pen = varselectorfit.alpha_
    if se_factor is not None:
        best_v_pen = _neg_se_rule(varselectorfit, factor=se_factor)
        varselectorfit = MultiTaskLasso(alpha=best_v_pen,
                                        normalize=normalize).fit(X, Y)
    V = np.sqrt(np.sum(np.square(varselectorfit.coef_),
                       axis=0))  # n_tasks x n_features -> n_feature
    m_sel = V != 0
    transformer = SelMatchSpace(m_sel)
    return transformer, V[m_sel], best_v_pen, (V, varselectorfit)
Esempio n. 8
0
def asd_multitasklasso():
    model = MultiTaskLasso()
    f = "/home/vandal.t/repos/pydownscale/pydownscale/test_data/testdata.pkl"
    data = pickle.load(open(f, 'r'))
    asdm = ASDMultitask(data, model, season='JJA')
    asdm.train()
    out = asdm.predict(test_set=False)
    out.to_netcdf("test_data/mtl_test.nc")
Esempio n. 9
0
def test_multi_task_lasso_readonly_data():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    with TempMemmap((X, Y)) as (X, Y):
        Y = np.c_[y, y]
        clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
        assert 0 < clf.dual_gap_ < 1e-5
        assert_array_almost_equal(clf.coef_[0], clf.coef_[1])
Esempio n. 10
0
def constrained_multiclass_solve(w, psi, alpha=1.0, **lasso_kws):
    """
    Solve
    .. math::

        \\text{argmin}_s \\|s\\|_0 \
        \\text{subject to} \\|w - psi s\\|_2^2 \\leq tol
    """
    model = MultiTaskLasso(alpha=alpha, **lasso_kws)
    model.fit(psi, w)
    return model.coef_.T
Esempio n. 11
0
 def test_model_multi_task_lasso(self):
     model, X = fit_regression_model(MultiTaskLasso(), n_targets=2)
     model_onnx = convert_sklearn(
         model, "multi-task lasso",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         verbose=False,
                         basename="SklearnMultiTaskLasso-Dec4")
Esempio n. 12
0
def get_hyperparameters_model():
    param_dist = {}

    clf = MultiTaskLasso()

    model = {
        'multi_task_lasso': {
            'model': clf,
            'param_distributions': param_dist
        }
    }
    return model
Esempio n. 13
0
    def __init__(self, scale=True, kfolds=4, alpha_stepsize=1.0, ncpus=None):
        """Predict motif activities using Lasso MultiTask regression

        Parameters
        ----------
        scale : boolean, optional, default True
            If ``True``, the motif scores will be scaled
            before classification

        kfolds : integer, optional, default 5
            number of kfolds for parameter search

        alpha_stepsize : float, optional, default 1.0
            stepsize for use in alpha gridsearch

        ncpus : int, optional
            Number of threads. Default is the number specified in the config.

        Attributes
        ----------
        act_ : DataFrame, shape (n_motifs, n_clusters)
            fitted motif activities

        sig_ : DataFrame, shape (n_motifs,)
            boolean values, if coefficients are higher/lower than
            the 1%t from random permutation
        """

        self.kfolds = kfolds
        self.act_description = "activity values: coefficients from " "fitted model"

        self.scale = scale
        if ncpus is None:
            ncpus = int(MotifConfig().get_default_params().get("ncpus", 2))
        self.ncpus = ncpus

        # initialize attributes
        self.act_ = None
        self.sig_ = None

        mtk = MultiTaskLasso()
        parameters = {
            "alpha": [np.exp(-x) for x in np.arange(0, 10, alpha_stepsize)]
        }
        self.clf = GridSearchCV(mtk,
                                parameters,
                                cv=kfolds,
                                n_jobs=self.ncpus,
                                scoring="r2")
        self.pref_table = "score"
        self.supported_tables = ["score", "count"]
        self.ptype = "regression"
def make_dictionary(X,
                    n_components=20,
                    alpha=5.,
                    write_dir='/tmp/',
                    contrasts=[],
                    method='multitask',
                    l1_ratio=.5,
                    n_subjects=13):
    """Create dictionary + encoding"""
    from sklearn.decomposition import dict_learning_online, sparse_encode
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import MultiTaskLasso, MultiTaskElasticNet

    mem = Memory(write_dir, verbose=0)
    dictionary = mem.cache(initial_dictionary)(n_components, X)
    np.savez(os.path.join(write_dir, 'dictionary.npz'),
             loadings=dictionary,
             contrasts=contrasts)
    if method == 'online':
        components, dictionary = dict_learning_online(X.T,
                                                      n_components,
                                                      alpha=alpha,
                                                      dict_init=dictionary,
                                                      batch_size=200,
                                                      method='cd',
                                                      return_code=True,
                                                      shuffle=True,
                                                      n_jobs=1,
                                                      positive_code=True)
        np.savez(os.path.join(write_dir, 'dictionary.npz'),
                 loadings=dictionary,
                 contrasts=contrasts)
    elif method == 'sparse':
        components = sparse_encode(X.T,
                                   dictionary,
                                   alpha=alpha,
                                   max_iter=10,
                                   n_jobs=1,
                                   check_input=True,
                                   verbose=0,
                                   positive=True)
    elif method == 'multitask':
        # too many hard-typed parameters !!!
        n_voxels = X.shape[1] // n_subjects
        components = np.zeros((X.shape[1], n_components))
        clf = MultiTaskLasso(alpha=alpha)
        clf = MultiTaskElasticNet(alpha=alpha, l1_ratio=l1_ratio)
        for i in range(n_voxels):
            x = X[:, i:i + n_subjects * n_voxels:n_voxels]
            components[i: i + n_subjects * n_voxels: n_voxels] =\
                clf.fit(dictionary.T, x).coef_
    return dictionary, components
def main():
    rng = np.random.RandomState(42)

    # Generate some 2D coefficients with sine waves with random frequency and phase
    n_samples, n_features, n_tasks = 100, 30, 40
    n_relevant_features = 5
    coef = np.zeros((n_tasks, n_features))
    times = np.linspace(0, 2 * np.pi, n_tasks)
    for k in range(n_relevant_features):
        coef[:, k] = np.sin((1. + rng.randn(1)) * times + 3 * rng.randn(1))

    X = rng.randn(n_samples, n_features)
    Y = np.dot(X, coef.T) + rng.randn(n_samples, n_tasks)

    coef_lasso_ = np.array([Lasso(alpha=0.5).fit(X, y).coef_ for y in Y.T])
    coef_multi_task_lasso_ = MultiTaskLasso(alpha=1.).fit(X, Y).coef_

    # #############################################################################
    # Plot support and time series
    fig = plt.figure(figsize=(8, 5))
    plt.subplot(1, 2, 1)
    plt.spy(coef_lasso_)
    plt.xlabel('Feature')
    plt.ylabel('Time (or Task)')
    plt.text(10, 5, 'Lasso')
    plt.subplot(1, 2, 2)
    plt.spy(coef_multi_task_lasso_)
    plt.xlabel('Feature')
    plt.ylabel('Time (or Task)')
    plt.text(10, 5, 'MultiTaskLasso')
    fig.suptitle('Coefficient non-zero location')

    feature_to_plot = 0
    plt.figure()
    lw = 2
    plt.plot(coef[:, feature_to_plot],
             color='seagreen',
             linewidth=lw,
             label='Ground truth')
    plt.plot(coef_lasso_[:, feature_to_plot],
             color='cornflowerblue',
             linewidth=lw,
             label='Lasso')
    plt.plot(coef_multi_task_lasso_[:, feature_to_plot],
             color='gold',
             linewidth=lw,
             label='MultiTaskLasso')
    plt.legend(loc='upper center')
    plt.axis('tight')
    plt.ylim([-1.1, 1.1])
    plt.show()
 def fit_force_params(self, alpha=None):
     """
     fit sparse linear regression on remaining n_variables-q variables
     alpha is penalization parameter, None triggers cross validation
     """
     if alpha is None:  # do cross validation
         self.force_model = \
             MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                              fit_intercept=False, normalize=False)
     else:
         self.force_model = \
             MultiTaskLasso(alpha=alpha, fit_intercept=False,
                            normalize=False)
     self.force_model.fit(self.features_forcing[self.mask_f], self.eps)
Esempio n. 17
0
def test_multi_task_lasso_and_enet():
    X, y, X_test, y_test = build_dataset()
    Y = np.c_[y, y]
    # Y_test = np.c_[y_test, y_test]
    clf = MultiTaskLasso(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1, tol=1e-8).fit(X, Y)
    assert 0 < clf.dual_gap_ < 1e-5
    assert_array_almost_equal(clf.coef_[0], clf.coef_[1])

    clf = MultiTaskElasticNet(alpha=1.0, tol=1e-8, max_iter=1)
    assert_warns_message(ConvergenceWarning, 'did not converge', clf.fit, X, Y)
Esempio n. 18
0
    def _get_minimizer(self):
        """Return the estimator for the method"""
        # The factor 0.5 for alpha in the Lasso/LassoLars problem is to compensate
        # 1/(2 * n_sample) factor in OLS term.
        if self.method == "multi-task":
            return MultiTaskLasso(
                alpha=self.cv_lambdas[0] / 2.0,
                fit_intercept=False,
                # normalize=False,
                # precompute=True,
                max_iter=self.max_iterations,
                tol=self.tolerance,
                copy_X=True,
                # positive=self.positive,
                random_state=None,
                warm_start=True,
                selection="random",
            )

        if self.method == "gradient_decent":
            return Lasso(
                alpha=self.cv_lambdas[0] / 2.0,
                fit_intercept=False,
                # normalize=False,
                precompute=True,
                max_iter=self.max_iterations,
                tol=self.tolerance,
                copy_X=True,
                positive=self.positive,
                random_state=None,
                warm_start=True,
                selection="random",
            )

        if self.method == "lars":
            return LassoLars(
                alpha=self.cv_lambdas[0] / 2.0,
                fit_intercept=False,
                verbose=True,
                # normalize=False,
                precompute="auto",
                max_iter=self.max_iterations,
                eps=2.220446049250313e-16,
                copy_X=True,
                fit_path=False,
                positive=self.positive,
                jitter=None,
                random_state=None,
            )
 def fit_lin_model(self, alpha=None):
     """
     fit sparse linear regression on first q variables
     alpha is penalization parameter, None triggers cross validation
     """
     if alpha is None:  # do cross validation
         self.lin_model = \
             MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                              fit_intercept=False, normalize=False,
                              max_iter=3500)
     else:
         self.lin_model = \
             MultiTaskLasso(alpha=alpha, fit_intercept=False,
                            normalize=False)
     self.lin_model.fit(self.features_lin_model[self.mask_l_m],
                        self.delta_v[self.mask_l_m])
def get_regressors_multitask(nmodels='all'):
    """
		Returns one or all of Multi-task linear regressors 
	"""
    # 1. MultiTaskElasticNet
    lr1 = MultiTaskElasticNet()

    # 2. MultiTaskLasso
    lr2 = MultiTaskLasso()

    if (nmodels == 'all'):
        models = [lr1, lr2]
    else:
        models = ['lr' + str(nmodels)]

    return models
Esempio n. 21
0
    def mtlasso_model(self, X_train, y_train, X_test, y_test):

        mtlasso_model = MultiTaskLasso(alpha=.005)

        mtlasso_model.fit(X_train, y_train)

        y_train_pred = mtlasso_model.predict(X_train)
        y_test_pred = mtlasso_model.predict(X_test)

        # Scoring the model
        print(mtlasso_model.score(X_train, y_train))
        print(mtlasso_model.score(X_test, y_test))
        print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error(
            y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)))
        print('R^2 train: %.6f, R^2 test: %.6f' %
              (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
Esempio n. 22
0
 def MultiTaskLasso_regression(self, X_train, y_train, X_test, y_test):
     
     alphas = np.logspace(-5, 5, 100)
     tuned_parameters = [{"alpha": alphas}]
     my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)
     model = MultiTaskLasso()
     gsearch_cv = GridSearchCV(estimator = model, param_grid = tuned_parameters, 
                               scoring = "neg_mean_squared_error", cv = my_cv, n_jobs=-1)
     gsearch_cv.fit(X_train, y_train)
     best_model = gsearch_cv.best_estimator_
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     mae = mean_absolute_error(y_test, y_pred)
     mse = mean_squared_error(y_test, y_pred)
     r2 = r2_score(y_test, y_pred)
     
     return best_model, mse, mae, r2
Esempio n. 23
0
def run_one_configuration(
    full_train_covariate_matrix,
    complete_target,
    new_valid_covariate_data_frames,
    new_valid_target_data_frame,
    std_data_frame,
    target_clusters,
    featurizer,
    model_name,
    parameters,
    log_file,
):
    model_baseline = dict()
    model_baseline["type"] = model_name
    model_baseline["target_clusters"] = target_clusters

    if model_name == "multi_task_lasso":
        model = MultiTaskLasso(max_iter=5000, **parameters)
    elif model_name == "xgboost":
        model = MultiOutputRegressor(
            XGBRegressor(n_jobs=10,
                         objective="reg:squarederror",
                         verbosity=0,
                         **parameters))

    model.fit(featurizer(full_train_covariate_matrix),
              complete_target.to_numpy(copy=True))
    model_baseline["model"] = lambda x: model.predict(featurizer(x))

    skill, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "skill",
    )
    cos_sim, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "cosine-sim",
    )
    with open(log_file, "a") as f:
        f.write(f"{len(target_clusters)} {parameters} {skill} {cos_sim}\n")
Esempio n. 24
0
def test_multitasklasso(gaussian_data, fit_intercept, normalize, alpha):

    X, y = gaussian_data
    X = [X[0], X[0]]
    n_samples = y.shape[1]

    Xty = np.array([xx.T.dot(yy) for xx, yy in zip(X, y)])
    alpha_max = np.linalg.norm(Xty, axis=0).max()
    alpha *= alpha_max / n_samples
    est = GroupLasso(alpha=alpha,
                     fit_intercept=fit_intercept,
                     normalize=normalize)
    est.fit(X, y)
    assert hasattr(est, 'is_fitted_')

    mtlasso = MultiTaskLasso(alpha=alpha,
                             fit_intercept=fit_intercept,
                             normalize=normalize)
    mtlasso.fit(X[0], y.T)
    assert_allclose(est.coef_, mtlasso.coef_.T, rtol=1e-2)
Esempio n. 25
0
def constrained_multiclass_solve(w, psi, alpha=1.0, quiet=False, **lasso_kws):
    """
    Solve

    .. math::

        \\text{argmin}_s \\|s\\|_0 \\\\
        \\text{subject to} \\|w - \\psi s\\|_2^2 \\leq tol
    """
    model = MultiTaskLasso(alpha=alpha, **lasso_kws)

    if quiet:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=RuntimeWarning)
            warnings.filterwarnings("ignore", category=UserWarning)
            model.fit(psi, w)
    else:
        model.fit(psi, w)

    return model.coef_.T
Esempio n. 26
0
 def fit(self, X, y):
     # check label has form of 2-dim array
     X, y, = copy.deepcopy(X), copy.deepcopy(y)
     self.sample_weight = None
     if y.shape.__len__() != 2:
         self.classes_ = np.unique(y)
         self.n_classes_ = self.classes_.__len__()
         y = self.__one2array(y, self.n_classes_)
     else:
         self.classes_ = np.arange(y.shape[1])
         self.n_classes_ = self.classes_.__len__()
     self.W = np.random.uniform(self.lower_bound,
                                self.upper_bound,
                                size=(X.shape[1], self.n_hidden))
     self.b = np.random.uniform(self.lower_bound,
                                self.upper_bound,
                                size=self.n_hidden)
     H = expit(np.dot(X, self.W) + self.b)
     self.multi_lasso = MultiTaskLasso(self.C,
                                       max_iter=self.max_iter).fit(H, y)
def multivariate_regression(output_filename):
    regression_output = open(output_filename, 'w')

    lm = MultiTaskLasso(alpha=0.1)
    reg_name = "MTLassoRegression"

    gcvr2, gr2 = cv_regression(lm,
                               n_data,
                               Game_cols,
                               ["NormalizedLearningGain", "Presence"],
                               show=True)
    gccvr2, gcr2 = cv_regression(lm,
                                 n_data,
                                 Game_cols + Comp_cols,
                                 ["NormalizedLearningGain", "Presence"],
                                 show=True)
    gaucvr2, gaur2 = cv_regression(lm,
                                   n_data,
                                   Game_cols + AU_cols,
                                   ["NormalizedLearningGain", "Presence"],
                                   show=True)
Esempio n. 28
0
    def __init__(self, scale=True, kfolds=5, alpha_stepsize=1 / 3.0):
        """Predict motif activities using Lasso MultiTask regression

        Parameters
        ----------
        scale : boolean, optional, default True
            If ``True``, the motif scores will be scaled 
            before classification
 
        kfolds : integer, optional, default 5
            number of kfolds for parameter search
        
        alpha_stepsize : float, optional, default 0.333
            stepsize for use in alpha gridsearch

        Attributes
        ----------
        act_ : DataFrame, shape (n_motifs, n_clusters)
            fitted motif activities
    
        sig_ : DataFrame, shape (n_motifs,)
            boolean values, if coefficients are higher/lower than
            the 1%t from random permutation
        """

        self.kfolds = kfolds
        self.act_description = ("activity values: coefficients from "
                                "fitted model")

        # initialize attributes
        self.act_ = None
        self.sig_ = None

        mtk = MultiTaskLasso()
        parameters = {
            "alpha": [np.exp(-x) for x in np.arange(0, 10, alpha_stepsize)],
        }
        self.clf = GridSearchCV(mtk, parameters, cv=kfolds, n_jobs=4)
Esempio n. 29
0
def multi_task_lasso(df):
    X = df[['X0', 'X1']]
    # X = df[['X0', 'X1', 'X2', 'X3']]
    Y = df[['y1', 'y2', 'y3']]

    mtl_scorer = make_scorer(mtl_roc_auc, greater_is_better=True)
    mtl_parameters = {
        'alpha': uniform(0, 10)
    }

    grid_search = RandomizedSearchCV(
        MultiTaskLasso(fit_intercept=False, alpha=0.05),
        mtl_parameters,
        n_iter=200,
        scoring=mtl_scorer,
        verbose=10,
        n_jobs=1,
        cv=5
    )
    grid_search.fit(X, Y)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print(grid_search.best_estimator_.coef_)
Esempio n. 30
0
    def predict(
        self,
        forecast_length: int,
        future_regressor=[],
        just_point_forecast: bool = False,
    ):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        if not _has_tsfresh:
            raise ImportError("Package tsfresh is required")
        # num_subsamples = 10
        predictStartTime = datetime.datetime.now()

        # from tsfresh import extract_features
        from tsfresh.utilities.dataframe_functions import make_forecasting_frame

        # from sklearn.ensemble import AdaBoostRegressor
        from tsfresh.utilities.dataframe_functions import impute as tsfresh_impute

        # from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters

        max_timeshift = 10
        regression_model = 'Adaboost'
        feature_selection = None

        max_timeshift = self.max_timeshift
        regression_model = self.regression_model
        feature_selection = self.feature_selection

        sktraindata = self.df_train.copy()

        X = pd.DataFrame()
        y = pd.DataFrame()
        counter = 0
        for column in sktraindata.columns:
            df_shift, current_y = make_forecasting_frame(
                sktraindata[column],
                kind="time_series",
                max_timeshift=max_timeshift,
                rolling_direction=1,
            )
            # disable_progressbar = True MinimalFCParameters EfficientFCParameters
            current_X = extract_features(
                df_shift,
                column_id="id",
                column_sort="time",
                column_value="value",
                impute_function=tsfresh_impute,
                show_warnings=False,
                default_fc_parameters=EfficientFCParameters(),
                n_jobs=1,
            )  #
            current_X["feature_last_value"] = current_y.shift(1)
            current_X.rename(columns=lambda x: str(counter) + '_' + x,
                             inplace=True)

            X = pd.concat([X, current_X], axis=1)
            y = pd.concat([y, current_y], axis=1)
            counter += 1

        # drop constant features
        X = X.loc[:, X.apply(pd.Series.nunique) != 1]
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        y = y.fillna(method='ffill').fillna(method='bfill')

        if feature_selection == 'Variance':
            from sklearn.feature_selection import VarianceThreshold

            sel = VarianceThreshold(threshold=(0.15))
            X = pd.DataFrame(sel.fit_transform(X))
        if feature_selection == 'Percentile':
            from sklearn.feature_selection import SelectPercentile, chi2

            X = pd.DataFrame(
                SelectPercentile(chi2, percentile=20).fit_transform(
                    X, y[y.columns[0]]))
        if feature_selection == 'DecisionTree':
            from sklearn.tree import DecisionTreeRegressor
            from sklearn.feature_selection import SelectFromModel

            clf = DecisionTreeRegressor()
            clf = clf.fit(X, y)
            model = SelectFromModel(clf, prefit=True)

            X = model.transform(X)
        if feature_selection == 'Lasso':
            from sklearn.linear_model import MultiTaskLasso
            from sklearn.feature_selection import SelectFromModel

            clf = MultiTaskLasso(max_iter=2000)
            clf = clf.fit(X, y)
            model = SelectFromModel(clf, prefit=True)

            X = model.transform(X)
        """
         decisionTreeList = X.columns[model.get_support()]
         LassoList = X.columns[model.get_support()]
         
         feature_list = decisionTreeList.to_list()
         set([x for x in feature_list if feature_list.count(x) > 1])
         from collections import Counter
         repeat_features = Counter(feature_list)
         repeat_features = repeat_features.most_common(20)
        """

        # Drop first line
        X = X.iloc[1:, ]
        y = y.iloc[1:]

        y = y.fillna(method='ffill').fillna(method='bfill')

        index = self.create_forecast_index(forecast_length=forecast_length)

        if regression_model == 'ElasticNet':
            from sklearn.linear_model import MultiTaskElasticNet

            regr = MultiTaskElasticNet(alpha=1.0,
                                       random_state=self.random_seed)
        elif regression_model == 'DecisionTree':
            from sklearn.tree import DecisionTreeRegressor

            regr = DecisionTreeRegressor(random_state=self.random_seed)
        elif regression_model == 'MLP':
            from sklearn.neural_network import MLPRegressor

            # relu/tanh lbfgs/adam layer_sizes (100) (10)
            regr = MLPRegressor(
                hidden_layer_sizes=(10, 25, 10),
                verbose=self.verbose_bool,
                max_iter=200,
                activation='tanh',
                solver='lbfgs',
                random_state=self.random_seed,
            )
        elif regression_model == 'KNN':
            from sklearn.multioutput import MultiOutputRegressor
            from sklearn.neighbors import KNeighborsRegressor

            regr = MultiOutputRegressor(
                KNeighborsRegressor(random_state=self.random_seed))
        elif regression_model == 'Adaboost':
            from sklearn.multioutput import MultiOutputRegressor
            from sklearn.ensemble import AdaBoostRegressor

            regr = MultiOutputRegressor(AdaBoostRegressor(
                n_estimators=200))  # , random_state=self.random_seed))
        else:
            regression_model = 'RandomForest'
            from sklearn.ensemble import RandomForestRegressor

            regr = RandomForestRegressor(random_state=self.random_seed,
                                         n_estimators=1000,
                                         verbose=self.verbose)

        regr.fit(X, y)

        combined_index = self.df_train.index.append(index)
        forecast = pd.DataFrame()
        sktraindata.columns = [x for x in range(len(sktraindata.columns))]

        for x in range(forecast_length):
            x_dat = pd.DataFrame()
            y_dat = pd.DataFrame()
            counter = 0
            for column in sktraindata.columns:
                df_shift, current_y = make_forecasting_frame(
                    sktraindata.tail(max_timeshift)[column],
                    kind="time_series",
                    max_timeshift=max_timeshift,
                    rolling_direction=1,
                )
                # disable_progressbar = True MinimalFCParameters EfficientFCParameters
                current_X = extract_features(
                    df_shift,
                    column_id="id",
                    column_sort="time",
                    column_value="value",
                    impute_function=tsfresh_impute,
                    show_warnings=False,
                    n_jobs=1,
                    default_fc_parameters=EfficientFCParameters(),
                )  # default_fc_parameters=MinimalFCParameters(),
                current_X["feature_last_value"] = current_y.shift(1)

                current_X.rename(columns=lambda x: str(counter) + '_' + x,
                                 inplace=True)

                x_dat = pd.concat([x_dat, current_X], axis=1)
                y_dat = pd.concat([y_dat, current_y], axis=1)
                counter += 1

            x_dat = x_dat[X.columns]
            rfPred = pd.DataFrame(regr.predict(x_dat.tail(1).values))

            forecast = pd.concat([forecast, rfPred], axis=0, ignore_index=True)
            sktraindata = pd.concat([sktraindata, rfPred],
                                    axis=0,
                                    ignore_index=True)
            sktraindata.index = combined_index[:len(sktraindata.index)]

        forecast.columns = self.column_names
        forecast.index = index

        if just_point_forecast:
            return forecast
        else:
            upper_forecast, lower_forecast = Point_to_Probability(
                self.df_train,
                forecast,
                prediction_interval=self.prediction_interval)

            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=forecast.index,
                forecast_columns=forecast.columns,
                lower_forecast=lower_forecast,
                forecast=forecast,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )
            return prediction