Esempio n. 1
0
def l1_enet(ratio):
    '''
    input l1 ratio and return the model, non zero coefficients and cv scores
    training elastic net properly
    '''
    enet_cv = ElasticNetCV(cv=rkf,
                           l1_ratio=ratio,
                           max_iter=1e7,
                           tol=0.001,
                           fit_intercept=fit_int_flag,
                           random_state=rs)
    enet_cv.fit(X_train, y_train)

    # the optimal alpha
    enet_alpha = enet_cv.alpha_
    enet_coefs = enet_cv.coef_
    n_nonzero = len(np.where(abs(enet_coefs) >= 1e-7)[0])
    # Access the errors
    y_predict_test = enet_cv.predict(X_test)
    y_predict_train = enet_cv.predict(X_train)

    # error per cluster
    enet_RMSE_test = np.sqrt(mean_squared_error(y_test, y_predict_test))
    enet_RMSE_train = np.sqrt(mean_squared_error(y_train, y_predict_train))

    return enet_cv, enet_alpha, n_nonzero, enet_RMSE_test, enet_RMSE_train
Esempio n. 2
0
def _elasticnetcv(*,
                  train,
                  test,
                  x_predict=None,
                  metrics,
                  l1_ratio=0.5,
                  eps=0.001,
                  n_alphas=100,
                  alphas=None,
                  fit_intercept=True,
                  normalize=False,
                  precompute='auto',
                  max_iter=1000,
                  tol=0.0001,
                  cv=None,
                  copy_X=True,
                  verbose=0,
                  n_jobs=None,
                  positive=False,
                  random_state=None,
                  selection='cyclic'):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNetCV.html#sklearn.linear_model.ElasticNetCV
    """

    model = ElasticNetCV(l1_ratio=l1_ratio,
                         eps=eps,
                         n_alphas=n_alphas,
                         alphas=alphas,
                         fit_intercept=fit_intercept,
                         normalize=normalize,
                         precompute=precompute,
                         max_iter=max_iter,
                         tol=tol,
                         cv=cv,
                         copy_X=copy_X,
                         verbose=verbose,
                         n_jobs=n_jobs,
                         positive=positive,
                         random_state=random_state,
                         selection=selection)
    model.fit(train[0], train[1])
    model_name = 'ElasticNetCV'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
def Regularized_Smap(abund, target_otu, theta, l_grid, iteration, cv, train_len):
    print('Process data for otu No. %s' % str(target_otu+1))
    # Make input for the elastic_net
    block = np.append(abund[1:, target_otu], abund[0:-1, ], axis=1)
    ##Delete the uncontinuous states
    block = np.delete(block, [abund.shape[0] / 3 - 1, abund.shape[0] / 3 * 2 - 1], axis=0)
    ##Scaling the input
    ##Each time series is normalized to have a mean of 0 and standard deviation of 1 before analysis with S-maps
    block = (block - np.average(block, axis=0)) / np.std(block, axis=0)

    ##Select data and fitting
    print('Start fitting.')
    lib = range(block.shape[0])
    coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1))
    fit_results = np.empty(shape=(block.shape[0], 13))

    for ipred in lib:
        print('\r', 'Complete percentage: %.2f%%' % (ipred / len(lib) * 100), end="", flush=True)
        sub_block = np.delete(block, ipred, axis=0)
        q = block[lib[ipred], :]
        ###Calculate weights
        E_dist = np.sqrt(np.sum(np.array(sub_block[:, 1:] - q[:, 1:]) ** 2, axis=1))
        w = make_weights(E_dist, theta)
        ###Weighted predictors and responses
        X_wp = weight_data(sub_block[:, 1:], w)
        Y_wp = np.ravel(weight_data(sub_block[:, 0], w))
        X_target = block[ipred, 1:]
        Y_target = block[ipred, 0]

        ##Split training and test data
        pick_test = np.random.choice(range(X_wp.shape[0]), size=train_len, replace=False)
        X_train = np.append(np.delete(X_wp, pick_test, axis=0), X_target, axis=0)
        X_test = X_wp[pick_test, :]
        Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target)
        Y_test = Y_wp[pick_test]

        ###Fit function
        regr = ElasticNetCV(cv=cv, random_state=0, max_iter=iteration,
                            l1_ratio=[(i + 1) * l_grid for i in range(int(1 / l_grid))])
        regr.fit(X_train, Y_train)
        rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train) ** 2))
        rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test) ** 2))
        coefs[ipred, :] = regr.coef_
        fit_results[ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std(Y_train), rmse_o, np.std(
            Y_test), regr.score(X_test, Y_test), regr.score(X_train, Y_train), max(Y_train), min(Y_train), max(
            Y_test), min(Y_test)
        print('\r', 'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100), end="", flush=True)

        # Output results
    coefs = pd.DataFrame(data=coefs)
    coefs.to_csv('../Output/test/0/coefs/%s_%s_coefs.csv' % (target_otu, theta))
    fit_results = pd.DataFrame(
        columns=['Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o', 'Std_o', 'Test set score',
                 'Test set score_train', 'ymax_train', 'ymin_train', 'ymax_test', 'ymin_test'],
        data=fit_results)
    fit_results.to_csv('../Output/test/0/fit_result/%s_%s_fit_results.csv' % (target_otu, theta))
Esempio n. 4
0
    def train_elasticnet_model(self, mode, ffm):
        # X_train = np.array(self.X_train[mode])
        X_train = np.array(self.X_train2)
        y_train = np.array(self.y_train[ffm])

        # X_val = np.array(self.X_val[mode])
        X_val = np.array(self.X_val2)
        y_val = np.array(self.y_val[ffm])

        l1ratios = np.linspace(0.1, 1, 10)

        mses = []
        alps = []
        verr = []

        for l1 in l1ratios:
            print(l1)
            enet = ElasticNetCV(l1_ratio=l1, cv=10)
            enet.fit(X_train, y_train)
            y_pred = enet.predict(X_val)
            mse = mean_squared_error(y_val, y_pred)
            v = enet.score(X_val, y_val)

            mses.append(mse)
            alps.append(enet.alpha_)
            verr.append(v)

        i_opt = np.argmin(mses)
        l1_opt = l1ratios[i_opt]
        alpha_opt = alps[i_opt]

        print("optimal l1", l1_opt)
        print("optimal alpha", alpha_opt)

        enet2 = ElasticNetCV(l1_ratio=l1_opt)
        enet2.fit(X_train, y_train)
        y_pred = enet2.predict(X_val)
        y_pred_train = enet2.predict(X_train)

        print("Training MSE", mean_squared_error(y_train, y_pred_train))
        print("Validation MSE", mean_squared_error(y_val, y_pred))

        print("Training Pearson R", pearsonr(y_train, y_pred_train))
        print("Validation Pearson R", pearsonr(y_val, y_pred))

        print("Training R2 score:", enet.score(X_train, y_train))
        print("Validation R2 score:", enet.score(X_val, y_val))

        # print(enet2.alpha_)

        key = tuple(mode + [ffm])
        self.elasticnet[key] = enet2

        return self.elasticnet[key]
    def run_repeated(self, feature_prefix, n_trials=10, kfold_num=5):

        if type(feature_prefix) == str:
            feature_prefix = [feature_prefix]
        X, Y = self.build(feature_prefix)
        X = X.values

        folder = RepeatedKFold(n_splits=kfold_num,
                               n_repeats=n_trials,
                               random_state=self.seed)
        results = list()
        desc = "+".join(feature_prefix)
        cv_iterator = tqdm(folder.split(X),
                           total=n_trials * kfold_num,
                           ncols=50,
                           desc=desc)
        for train_index, test_index in cv_iterator:
            for f in Y.columns:
                y = Y[f].values
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                model = ElasticNetCV(
                    random_state=self.seed,
                    n_alphas=50,
                    cv=10,
                    n_jobs=4,
                    l1_ratio=[.01, .1, 0.3, .5, 0.7, 0.9, 0.99],
                    selection='random',
                    tol=5e-3,
                    verbose=0)

                model.fit(X_train, y_train)
                y_test_pred = model.predict(X_test)
                y_train_pred = model.predict(X_train)

                train_r2 = r2_score(y_train, y_train_pred)
                test_r2 = r2_score(y_test, y_test_pred)
                cv_iterator.set_description("{} {}: {:.2f}".format(
                    desc, f, test_r2))
                cv_iterator.refresh()
                sleep(0.01)

                r_row = {
                    "foundation": f,
                    "test_r2": test_r2,
                    "train_r2": train_r2,
                    "alpha": model.alpha_,
                    "l1_ratio": model.l1_ratio_
                }
                results.append(r_row)
        df = pd.DataFrame(results)
        return df
def elastic_net_reg(X_train_scaled, X_test_scaled, y_train, y_test):
    from sklearn.linear_model import ElasticNetCV
    #n_alphas (int) số lượng số alphas trong quá trình regularization, được sử dụng cho mỗi l1_ratio
    n_alphas = 300
    #float between 0 and 1 passed to ElasticNet (scaling between l1 and l2 penalties)
    l1_ratio = [.1, .3, .5, .7, .9]
    #cv: chỉ định số lượng k-folds
    rr = ElasticNetCV(n_alphas=n_alphas, l1_ratio=l1_ratio, cv=10, random_state=0)
    rr.fit(X_train_scaled, y_train)
    y_pred_train = rr.predict(X_train_scaled)
    y_pred_test = rr.predict(X_test_scaled)
    metrics_en = [accuracy_score(y_test, np.round(y_pred_test)), mean_squared_error(y_test, y_pred_test), r2_score(y_test, y_pred_test)]
    return metrics_en
Esempio n. 7
0
def elasticnet():
    elasticnet = ElasticNetCV()
    X_train, X_test, Y_train, Y_test = train_test_split(train_pca_value,
                                                        train_pro,
                                                        test_size=0.1,
                                                        random_state=9)
    elasticnet.fit(X_train, Y_train)
    pre = elasticnet.predict(X_test)
    loss = mean_squared_error(pre, Y_test)
    print(loss)
    pre = elasticnet.predict(test_pca_data)
    write = open('data/elasticnet.txt', 'w')
    for i in range(len(pre)):
        write.write("%f\r" % pre[i])
    write.close()
def learn_for(reviews, i):
    reg = ElasticNetCV(fit_intercept=True,
                       alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    u = reviews[i]
    us = range(reviews.shape[0])
    us = np.delete(us, i)
    ps, = np.where(u.toarray().ravel() > 0)
    x = reviews[us][:, ps].T
    y = u.data
    kf = KFold(n_splits=4)
    predictions = np.zeros(len(
        u.toarray().ravel()))  # 他のモデルと形を合わせるため,評価が行われていない映画はpredictionsを0にする
    for train, test in kf.split(y):
        xc = x[train].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        reg.fit(xc, y[train] - x1)

        xc = x[test].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        p = np.array(reg.predict(xc)).ravel()
        predictions[test] = p
    return predictions
Esempio n. 9
0
def main(file_name):
    df_X_train = pd.read_csv("../datasets/{}_X_train.csv".format(file_name),
                             index_col=0)
    df_X_test = pd.read_csv("../datasets/{}_X_test.csv".format(file_name),
                            index_col=0)
    df_y_train = pd.read_csv("../datasets/{}_y_train.csv".format(file_name),
                             index_col=0)
    df_y_test = pd.read_csv(
        "../datasets/{}_y_test.csv".format(file_name),
        index_col=0,
    )
    bg_genes = open(os.path.join("../datasets/",
                                 "bg_genes.txt")).read().split("\n")
    affecting_genes = open(os.path.join(
        "../datasets/", "affecting_genes.txt")).read().split("\n")
    if affecting_genes[0] == '':
        affecting_genes = []
    fs = bg_genes + affecting_genes + ['T']
    df_X_train = df_X_train.loc[:, fs]
    df_X_test = df_X_test.loc[:, fs]

    model = ElasticNetCV().fit(df_X_train, df_y_train)
    y_hat = model.predict(df_X_test)
    loss = np.sqrt(mean_squared_error(y_hat, df_y_test))
    file("../output/elastic_net_{}".format(file_name),
         'w+').write(pickle.dumps(model))
    print "loss_elastic_net: {}".format(loss)
Esempio n. 10
0
def regression_NumMosquitos(Xtr, ytr, Xte):
    from sklearn.linear_model import ElasticNetCV
    #model_nm = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], max_iter=10000, cv=4)
    model_nm = ElasticNetCV()
    model_nm.fit(Xtr, ytr)
    results_nm = model_nm.predict(Xte)
    return results_nm
Esempio n. 11
0
def predict(X_train, X_test, y_train, y_test, features, pic_name, dir):
    """
    The function predicts the tags of X_test by the elastic net model
    :param X_train:
    :param X_test:
    :param y_train:
    :param y_test:
    :param labels:
    :param pic_name:
    :param dir:
    :return:
    """
    model = ElasticNetCV(cv=4)
    model.fit(X_train, y_train)

    predict = model.predict(X_test)
    print("mean absolute error: ", mean_absolute_error(y_test, predict))
    print("r2 error: ", sklearn.metrics.r2_score(y_test, predict))
    print("alpha: ", model.alpha_)
    print("alphas: ", model.alphas_)
    print("iter: ", model.n_iter_)

    x = len(features)
    y = len(model.coef_)
    coefficients = [(d, c) for d, c in zip(features, model.coef_)]
    coefficients_str = ""
    for a, b in coefficients:
        coefficients_str += a + ": " + str("%.4f" % b) + "\n"
    coefficients_str = coefficients_str[:-2]

    print("coef: ", coefficients_str)

    Plot_output.plot_coefficients(coefficients_str, pic_name=pic_name, dir=dir)
    Plot_output.plot_graph(X_test, y_test, predict, pic_name, dir)
Esempio n. 12
0
def GLM(X_train, X_test, y_train):

    GLM_Model = ElasticNetCV(random_state=0, tol=0.01, cv=5, max_iter=20000)
    GLM_Model.fit(X_train, y_train)
    y_prediction = GLM_Model.predict(X_test)

    return y_prediction
Esempio n. 13
0
def learn_for(reviews, i):
    reg = ElasticNetCV(fit_intercept=True,
                       alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    nusers, nmovies = reviews.shape
    u = reviews[i]
    us = np.arange(reviews.shape[0])
    us = np.delete(us, i)
    ps, = np.where(u.ravel() > 0)
    x = reviews[us][:, ps].T
    kf = KFold(len(ps), n_folds=4)
    predictions = np.zeros(len(ps))
    for train, test in kf:
        xc = x[train].copy()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        reg.fit(xc, u[train] - x1)

        xc = x[test].copy()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        p = reg.predict(xc).ravel()
        predictions[test] = p
    fill_preds = np.zeros(nmovies)
    fill_preds[ps] = predictions
    return fill_preds
Esempio n. 14
0
def Model(Encoding, Scores, Run_name, step_size, loop_dict, var_dict,
          round_data, ElasticNet_dict, l1_ratios, All_data):
    Pearson_correlations = []
    Data = Encoding.copy()  #copy, so it does not change#
    Data_sets = CV_split(Data, 5)  # The Big 5#
    for cv_round in range(len(Data_sets)):
        score_dict = Scores.copy()  #Randomized scores at the start each time#
        Test_set = Data_sets[cv_round]
        Train_set = exclude(Data_sets,
                            cv_round)  #Keeps everything but the train set#
        Train_set = pd.concat(Train_set)  #All train sets into on dataframe#
        X = Train_set.iloc[:, :Train_set.shape[1] - 1]  #features#
        X['Intercept'] = 1  #add intercept#
        y = pd.DataFrame(Train_set['pMeas'])  #targets#
        AM_EndOfLoopError = []
        AM_EndOfLoopError.append(Get_Error(
            X, y, score_dict))  # The Error Before AM Tuning #
        """AM Tuning Looping Starts Here and Adds a value to End of Loop Error"""
        Loop_num = 1  #
        AM_EndOfLoopError.append(
            Amplitude_Tuning(X, y, step_size, score_dict, Loop_num, Run_name,
                             cv_round, loop_dict, var_dict))
        round_data[cv_round] = loop_dict
        while ((AM_EndOfLoopError[-1] - AM_EndOfLoopError[-2]) /
               (AM_EndOfLoopError[-2])) < -0.001:
            Loop_num += 1
            AM_EndOfLoopError.append(
                Amplitude_Tuning(X, y, step_size, score_dict, Loop_num,
                                 Run_name, cv_round, loop_dict, var_dict))
            round_data[cv_round] = loop_dict
        loop_dict['AM Time Series Data'] = AM_EndOfLoopError
        loop_dict['Final Scores'] = score_dict
        """  AM Tuning is now Finished for the CV_split, Elastic Net is Next """
        EN = ElasticNetCV(l1_ratio=l1_ratios,
                          cv=5,
                          copy_X=True,
                          normalize=True,
                          random_state=23)
        X_train = X.copy()
        X_train.replace(score_dict, inplace=True)
        y_train = y.copy()
        X_test = Test_set.iloc[:, :Test_set.shape[1] - 1]
        X_test.replace(score_dict, inplace=True)
        X_test['Intercept'] = 1
        y_test = pd.DataFrame(Test_set['pMeas'])
        EN.fit(X_train, y_train)
        y_pred = pd.DataFrame(EN.predict(X_test))
        Pearson_correlations.append(np.corrcoef(y_test.T, y_pred.T)[0][1])
        """Save Everything """
        ElasticNet_dict["y_pred"] = y_pred
        ElasticNet_dict['y_test'] = y_test
        ElasticNet_dict['Alpha'] = EN.alpha_
        ElasticNet_dict['l1_ratio'] = EN.l1_ratio_
        ElasticNet_dict['Parameters'] = EN.get_params()
        ElasticNet_dict["AlphaSpace"] = EN.alphas_
        loop_dict['ElasticNet'] = ElasticNet_dict
        round_data[cv_round] = loop_dict
    All_data[Run_name] = round_data
    np.save("All Data.npy", All_data)
    return np.mean(Pearson_correlations)
Esempio n. 15
0
def elastic_net(name, cv=5):
    '''Outputs a fitted Elastic Net Regression Model with tuning parameters found through cross validation.
	
	Inputs must be standardized.
	l1_ratios are spread out on a log scale as recommended by package authors.
	Number of folds in cross validation is by default 5.
	n_jobs = -1 allows for all local processors to be utilized.
	# '''
    # if np.any(X_train.mean(axis = 0) > 1):
    # 	raise ValueError('Numerical features must be standardized')

    display_name = ds.get_names()[name]
    X_train, X_test, y_train, y_test, train = split.split_subset(name)
    X_train, X_test = split.standardize(X_train, X_test)
    l1_ratios = np.geomspace(1e-8, 1, 50)
    model = ElasticNetCV(l1_ratio=l1_ratios,
                         n_alphas=50,
                         cv=5,
                         verbose=0,
                         n_jobs=-1,
                         random_state=18).fit(X_train, y_train)

    performance = metrics.apply_metrics('{} Elastic Net'.format(display_name),
                                        y_test, model.predict(X_test), y_train)
    performance['Tuning Parameters'] = [{
        'Alpha': model.alpha_,
        'L1 Ratio': model.l1_ratio_
    }]
    return model, performance
Esempio n. 16
0
def calculateAccuracyWithModel(indbest, X_train, y_train, X_test, y_test):
    indbest = list(indbest)
    evalTrain = evaluatedMatrix(indbest, X_train)
    evalTest = evaluatedMatrix(indbest, X_test)

    # Linear regression with elastic net
    regr = ElasticNetCV(random_state=0)
    regr.fit(evalTrain, y_train)
    y_pred = regr.predict(evalTest)
    print(r2_score(y_test, y_pred))
    indbest, regr.coef_ = sortCoef(indbest, regr.coef_)
    model = ""
    i = 0
    if regr.intercept_ not in [0, -0]:
        model = str(coefStr(regr.intercept_))
    for ind in indbest:
        if regr.coef_[i] not in [0, -0]:
            if "-" in str(regr.coef_[i]):
                indCoef = str(coefStr(regr.coef_[i])) + "*" + str(ind)
            elif len(model) > 0:
                indCoef = "+" + str(coefStr(regr.coef_[i])) + "*" + ind
            else:
                indCoef = str(coefStr(regr.coef_[i])) + "*" + ind
            model = model + indCoef
        i = i + 1
    print(model)
Esempio n. 17
0
def algor_ElasticNetCV():
    request_content = request.form.to_dict()
    df = pd.read_csv(session.get('file'))
    X_train,Y_train = onehot(df)
    params = request_content
    if params['alpha'] != 'None':
        params['alpha'] = [float(params['alpha'])]
    else:
        params['alpha'] = None
    # print(type(params['max_depth']))
    # print(params['max_iter'])
    # elif params['class_weight'] == 'l1':
    # max_iter = int(round(float(params['max_iter'])))
    model = ElasticNetCV(alphas=params['alpha'],
                               l1_ratio=float(params['l1_rotio']),
                               fit_intercept=bool(params['fit_intercept']),
                               normalize=bool(params['normalize']),
                               max_iter=int(params['max_iter']),
                               tol=float(params['tol'],)
                               )
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_train)

    context = {
        'algor': '弹性网回归',
        'roc_AUC': 'None(仅用于分类器)',
        'ACC': 'None(仅用于分类器)',
        'Recall': 'None(仅用于分类器)',
        'F1_score': 'None(仅用于分类器)',
        'Precesion':  'None(仅用于分类器)',
        'R_2' : round(metrics.r2_score(Y_train,y_pred),2)
    }
    return render_template('ElasticNetCV.html', **context)
Esempio n. 18
0
 def elasticnet_cv(self, nsplits: int, lam: float = None, l1_ratio: float = None):
     """
     runs a cross validation on the data set and returns the cross validation performance
     :param nsplits: number of cv splits
     :param lam: tuning parameter
     :param l1_ratio: balance l1 and l2 penalization, 0 means ridge, 1 means lasso
     :return: the cross-validated mse
     """
     if lam is None or l1_ratio is None:
         model = ElasticNetCV(cv=nsplits, l1_ratio=[0.1, 0.3, 0.5, 0.7, 0.95, 0.99, 1]).fit(self.x, self.y)
         if lam is None:
             lam = model.alpha_
         if l1_ratio is None:
             l1_ratio = model.l1_ratio_
     cv = KFold(n_splits=nsplits)
     mse_result = []
     for train, test in cv.split(self.x):
         x_train = self.x[train, :]
         x_test = self.x[test, :]
         y_train = self.y[train]
         y_test = self.y[test]
         model = ElasticNet(alpha=lam, l1_ratio=l1_ratio).fit(x_train, y_train)
         y_predict = model.predict(x_test)
         mse_result.append(mse(y_test, y_predict))
     return np.mean(mse_result)
Esempio n. 19
0
    def learn_for(self, i):
        reviews        = AbstractEstimateBase.reviews
        reg            = ElasticNetCV(fit_intercept=True, alphas=[
                           0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
        nusers,nmovies = reviews.shape
        u              = reviews[i]
        us             = np.arange(reviews.shape[0])
        us             = np.delete(us, i)
        ps,            = np.where(u.ravel() > 0)
        x              = reviews[us][:, ps].T
        kf             = KFold(len(ps), n_folds=4)
        predictions    = np.zeros(len(ps))
        for train, test in kf:
            xc = x[train].copy()
            x1 = np.array([xi[xi > 0].mean() for xi in xc])
            x1 = np.nan_to_num(x1)

            for i in range(xc.shape[0]):
                xc[i] -= (xc[i] > 0) * x1[i]

            reg.fit(xc, u[train] - x1)

            xc = x[test].copy()
            x1 = np.array([xi[xi > 0].mean() for xi in xc])
            x1 = np.nan_to_num(x1)

            for i in range(xc.shape[0]):
                xc[i] -= (xc[i] > 0) * x1[i]

            p = reg.predict(xc).ravel()
            predictions[test] = p
        fill_preds = np.zeros(nmovies)
        fill_preds[ps] = predictions

        return fill_preds
Esempio n. 20
0
    def predict(self, X):
        binary = X > 0
        if self.normalize == True:
            X = self.norm.fit_transform(X)

        num_users, num_movies = X.shape
        clf = ElasticNetCV(alphas=[0.1])
        predicted = X.copy()

        for user in range(num_users):
            #bool array for movies rated by user
            movie_user = binary[user]
            #which users to consider as attributes for regression, in this case all except current user
            neighbors = np.ones((num_users), dtype=bool)
            neighbors[user] = False
            X_train_user = X[neighbors]
            X_train_user = X_train_user[:, movie_user].T
            y_train_user = X[user, movie_user]
            clf.fit(X_train_user, y_train_user)
            X_test_user = X[neighbors]
            X_test_user = X_test_user[:, ~movie_user].T
            predicted[user, ~movie_user] = clf.predict(X_test_user)

        if self.normalize == True:
            predicted = self.norm.inverse_transform(predicted)

        return predicted
Esempio n. 21
0
def Elastic_net_fitting(block, target_otu, interest_otu, theta, train_len, cv, iteration, l_grid, output_dir):
    ##Select data and fitting
    print('Start fitting.')
    lib = range(block.shape[0])
    coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1))
    fit_results = np.empty(shape=(block.shape[0], 13))

    for ipred in lib:
        print('\r', 'Complete percentage: %.2f%%' % (ipred / len(lib) * 100), end="", flush=True)
        sub_block = np.delete(block, ipred, axis=0)
        q = block[lib[ipred], :]
        ###Calculate weights
        E_dist = np.sqrt(np.sum(np.array(sub_block[:, 1:] - q[:, 1:]) ** 2, axis=1))
        w = make_weights(E_dist, theta)
        ###Weighted predictors and responses
        X_wp = weight_data(sub_block[:, 1:], w)
        Y_wp = np.ravel(weight_data(sub_block[:, 0], w))
        X_target = block[ipred, 1:]
        Y_target = block[ipred, 0]

        ##Split training and test data
        pick_test = np.random.choice(range(X_wp.shape[0]), size=train_len, replace=False)
        X_train = np.append(np.delete(X_wp, pick_test, axis=0), X_target, axis=0)
        X_test = X_wp[pick_test, :]
        Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target)
        Y_test = Y_wp[pick_test]

        ###Fit function
        regr = ElasticNetCV(cv=cv, random_state=0, max_iter=iteration,
                            l1_ratio=[(i + 1) * l_grid for i in range(int(1 / l_grid))])
        regr.fit(X_train, Y_train)
        rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train) ** 2))
        rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test) ** 2))
        coefs[ipred, :] = regr.coef_
        fit_results[ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std(Y_train), rmse_o, np.std(
            Y_test), regr.score(X_test, Y_test), regr.score(X_train, Y_train), max(Y_train), min(Y_train), max(
            Y_test), min(Y_test)
        print('\r', 'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100), end="", flush=True)

    # Output results
    coefs = pd.DataFrame(data=coefs)
    coefs.to_csv('/'.join([output_dir,'coefs/%s_%s_%s_fit_results.csv' % (interest_otu, target_otu, theta)]))
    fit_results = pd.DataFrame(
        columns=['Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o', 'Std_o', 'Test set score',
                 'Test set score_train', 'ymax_train', 'ymin_train', 'ymax_test', 'ymin_test'],
        data=fit_results)
    fit_results.to_csv('/'.join([output_dir,'fit_result/%s_%s_%s_fit_results.csv' % (interest_otu, target_otu, theta)]))
Esempio n. 22
0
class LinearModel:
    def fit(self, X, y):
        self.clf = ElasticNetCV(cv=5, random_state=0).fit(X, y)

    def predict(self, X):
        y_pred_prob = self.clf.predict(X)
        y_pred_prob_vec = np.array([[i, 1 - i] for i in y_pred_prob])
        return _, _, y_pred_prob_vec
Esempio n. 23
0
def enetCV():
    print ("Doing elastic net")
    cross_val = cross_validation.ShuffleSplit(len(base_X), n_iter=5, test_size=0.2, random_state=0)
    clf4 = ElasticNetCV(cv=cross_val)
    clf4.fit(base_X, base_Y)
    print ("Score = %f" % clf4.score(base_X, base_Y))
    clf4_pred = clf4.predict(X_test)
    write_to_file("elasticCV.csv", clf4_pred)
Esempio n. 24
0
def train_test_en(input_data, output_data, train_key, test_key, n_cv=3):
    """
    elastic net回帰による学習/予測
    """
    # set parameter
    #alphas = 10 ** np.arange(-2, 1, 0.1)
    
    # 例外処理 : 学習データ点数が分割数より少ない場合
    if len(train_key) < n_cv:
        n_cv = len(train_key)
    
    #-------------
    # 学習
    #-------------
    x = input_data[train_key,:]
    y = output_data[train_key]
    
    # インスタンス
    x_scaler = StandardScaler() #正規化
    y_scaler = StandardScaler() #正規化
    
    clf = ElasticNetCV(l1_ratio=[.05, .15, .5, .7, .9, .95, .99, 1], n_jobs=8, n_alphas=20, cv=n_cv)
    
    # モデル構築
    x_scaler.fit(x) #正規化
    y_scaler.fit(y.reshape(-1,1)) #正規化
    
    y_ = y_scaler.transform(y.reshape(-1,1))
    y_ = y_.reshape(-1)
    
    #import pdb; pdb.set_trace()
    
    # モデル構築
    with warnings.catch_warnings(): #警告無視
        warnings.simplefilter("ignore")
        clf.fit(x_scaler.transform(x), y_)
    
    # モデルパラメータ取得
    #alpha = clf.alpha_ #ハイパーパラメータ
    a = clf.coef_ #係数
    b = clf.intercept_ #切片
    p = np.append(a, b)
    
    #-------------
    # 予測
    #-------------
    x = input_data[test_key,:]
    
    # 例外処理 : xのデータ点数 = 1の場合 ⇒配列を整形
    if x.ndim == 1:
        x = x.reshape(1,-1)
    
    # 予測
    tmp = clf.predict(x_scaler.transform(x))
    y_pred = y_scaler.inverse_transform(tmp) #非正規化
    return y_pred, p
Esempio n. 25
0
File: lm.py Progetto: billpun/kaggle
    def train(self, y, folds, l1_ratio=0.8, normalize=True):
        K = len(folds)
        yhats = []
        rmses = []
        for k in range(K):
            start = time.time()

            train = folds[k]['train']
            valid = folds[k]['valid']

            X_train = train.drop(columns=[y]).values
            y_train = train[y].values
            X_valid = valid.drop(columns=[y]).values
            y_valid = valid[y].values

            if self.model == 'en':
                kf = KFold(n_splits=10, random_state=100 + k, shuffle=True)
                lm = ElasticNetCV(cv=kf,
                                  random_state=k,
                                  normalize=normalize,
                                  max_iter=5000,
                                  l1_ratio=l1_ratio)
            elif self.model == 'lm':
                lm = LinearRegression(normalize=normalize)

            lm.fit(X_train, y_train)
            y_pred = lm.predict(X_valid)
            yhats.append(
                pd.DataFrame({
                    'y': valid[y],
                    'yhat': y_pred
                },
                             index=valid.index))
            pickle.dump(
                lm,
                open(
                    os.path.join(self.model_path,
                                 '{}_{}.tar'.format(self.model, k)), 'wb'))
            rmse = np.sqrt(mean_squared_error(y_valid, y_pred))
            rmses.append(rmse)

            if self.verbose:
                print(
                    'done training with {} fold and rmse={}. took {}s'.format(
                        k, round(rmse, 4), int(time.time() - start)))

        rmse = round(np.mean(rmses), 4)
        yhats = pd.concat(yhats).sort_index()
        yhats.to_csv(
            os.path.join(self.output_path, '{}_{}'.format(self.model, rmse)))

        if self.verbose:
            print('avg rmse: {}'.format(rmse))

        return rmse
Esempio n. 26
0
def calculateAccuracy(indbest, X_train, y_train, X_test, y_test):
    indbest = list(indbest)
    evalTrain = evaluatedMatrix(indbest, X_train)
    evalTest = evaluatedMatrix(indbest, X_test)

    # Linear regression with elastic net
    regr = ElasticNetCV(random_state=0)
    regr.fit(evalTrain, y_train)
    y_pred = regr.predict(evalTest)
    print("Test Accuracy: ", r2_score(y_test, y_pred))
    return r2_score(y_test, y_pred)
Esempio n. 27
0
def local_elasticnet(dataset, cid_input):
    x_df = dataset[cid_input.X_SCHEMA_CID3]
    y_df = dataset[cid_input.Y_SCHEMA_CID3]
    #x_df = (x_df - x_df.mean()) / (x_df.max() - x_df.min())
    #elasticnet绝对不能用上面方法进行数据归一化
    x_np = x_df.values
    y_np = y_df.values.ravel()
    lm = ElasticNetCV()
    lm.fit(x_np, y_np)
    coefficients = lm.coef_
    intercept = lm.intercept_
    initial_fitting = lm.predict(x_df)
    initial_fitting = np.exp(initial_fitting)
    x_non_promo = non_promo_inputs(x_df,
                                   cid_input.cid3_promotion_features).values
    s_initial_fitting = lm.predict(x_non_promo)
    s_initial_fitting = np.exp(s_initial_fitting)
    current_dt = dataset.copy()
    current_dt['initial_base_line'] = s_initial_fitting
    return current_dt[cid_input.LOCAL_SCHEMA_CID3]
def elastic_regression(X,y,X_test):
    elastic_net = ElasticNetCV(alphas=[0.5,1.0,5.0,10],
                               l1_ratio=[.1, .5, .9, 0.95,0.99],
                               tol=0.001, max_iter=5000)
    elastic_net.fit(X, y)
    y_elastic_pred = elastic_net.predict(X_test)
    pred_elastic = pd.DataFrame(y_elastic_pred)
    best_alpha = elastic_net.alpha_
    best_l1_ratio = elastic_net.l1_ratio_
    print ("Best Alpha for ElasticNet:", best_alpha)
    print ("L1 Ratio for ElasticNet:", best_l1_ratio)
    return pred_elastic,best_alpha,best_l1_ratio
Esempio n. 29
0
def eNetModel(data, labels, featureNames, texts, documents, nFolds):
    # run SVM with grid search for parameters and leave-one-out cross validation
    kf = KFold(len(texts), n_folds=nFolds)
    acc = 0
    mean_coefs = []
    for train, test in kf:

        # test_docs = {}
        label_train = labels[train]
        #selected_feats = getSelectedFeatures(train, test, texts, featureNames, documents, label_train, nFeats)

        full_train_data, full_test_data, label_train, label_test = data[train], data[test], labels[train], labels[test]

        #data_train = sortBySelected(full_train_data, selected_feats, featureNames)
        #data_test = sortBySelected(full_test_data, selected_feats, featureNames)

        data_train = full_train_data
        data_test = full_test_data

        enet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],n_alphas=1000,alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])

        enet.fit(data_train, label_train)

        data_train = np.asarray(data_train,dtype=float)
        label_train = np.asarray(label_train,dtype=float)

        vals = enet.path(data_train, label_train)
        mean_coefs.append(np.mean(vals[1],axis=1))

        if label_test == 1 and enet.predict(data_test) > 0.5:
            acc += 1
        elif label_test == 0 and enet.predict(data_test) < 0.5:
            acc += 1

        if len(mean_coefs) % 10 == 0:
            print str(len(mean_coefs)), 'out of %s subs finished' %(str(len(data)))

    mean_coefs = np.mean(np.array(mean_coefs), axis=0)

    return Decimal(acc)/Decimal(len(data)), mean_coefs
Esempio n. 30
0
def regtsls(data, opts):
    T_test, Z, T, Y = data
    trans = PolynomialFeatures(degree=_get(opts, 'lin_degree', 1),
                               include_bias=False)
    polyT = trans.fit_transform(T)
    first = Pipeline([('poly',
                       PolynomialFeatures(degree=_get(opts, 'lin_degree', 1))),
                      ('elasticnet', MultiTaskElasticNetCV(cv=3))])
    first.fit(Z, polyT)
    second = ElasticNetCV(cv=3)
    second.fit(first.predict(Z), Y.ravel())
    polyT_test = trans.fit_transform(T_test)
    return second.predict(polyT_test).reshape(T_test.shape[:1] + Y.shape[1:])
Esempio n. 31
0
    def train_elasticNetCV(self, data):
        train, validacion = data
        x_tr, y_tr = train
        x_val, y_val = validacion
        #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1]))
        #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1]))

        print('Start training ElasticNetCV...')
        start_time = self.timer()

        enet = ElasticNetCV(normalize=True,
                            n_alphas=2000,
                            max_iter=2000,
                            cv=10)
        enet.fit(x_tr, y_tr)
        print("The R2 is: {}".format(enet.score(x_tr, y_tr)))
        print("The alpha choose by CV is:{}".format(enet.alpha_))
        self.timer(start_time)

        print("Making prediction on validation data")
        y_val = np.expm1(y_val)
        y_val_pred = np.expm1(enet.predict(x_val))
        mae = mean_absolute_error(y_val, y_val_pred)
        print("El mean absolute error de es {}".format(mae))

        print('Saving model into a pickle')
        try:
            os.mkdir('pickles')
        except:
            pass

        with open('pickles/enetCV.pkl', 'wb') as f:
            pickle.dump(enet, f)

        print('Making prediction and saving into a csv')
        y_test = enet.predict(self.x_test)

        return y_test
Esempio n. 32
0
def elastic_net_reg():
    from sklearn.linear_model import ElasticNetCV
    n_alphas = 300
    l1_ratio = [.1, .3, .5, .7, .9]
    rr = ElasticNetCV(n_alphas=n_alphas,
                      l1_ratio=l1_ratio,
                      cv=10,
                      random_state=0)
    rr.fit(X_train_scaled, y_train)
    y_pred_train = rr.predict(X_train_scaled)
    #y_pred_train_round = np.round(y_pred_train)
    y_pred_test = rr.predict(X_test_scaled)
    #y_pred_test_round = np.round(y_pred_test)
    print(rr.alpha_, rr.l1_ratio_)
    print(rr.score(X_test_scaled, y_test))
    #plot_conf_mat(y_test, _pred_round)
    global metrics_en
    metrics_en = [
        accuracy_score(y_test, np.round(y_pred_test)),
        mean_squared_error(y_test, y_pred_test),
        r2_score(y_test, y_pred_test)
    ]
    return scores_results(y_train, y_test, y_pred_train, y_pred_test)
Esempio n. 33
0
File: lccb.py Progetto: jmmcd/PODI
def LCCB_coevo(fitness_fn, pop):
    y = fitness_fn.train_y
    # Make a new array composed of pop[i].semantics for all i
    # (pop[i].semantics has already been calculated)
    X = None
    for ind in pop:
        if (ind.phenotype and ind.fitness != sys.maxint
            and all(np.isfinite(ind.semantics))):
            col = ind.semantics
        else:
            print("Omitting a column")
            col = np.zeros(len(y))
        if X is None:
            X = col
        else:
            X = np.c_[X, col]

    eps = 5e-3

    # FIXME FFX processes the data so that has zero mean and unit
    # variance before applying the LR... should we do that?

    # Use ElasticNet with cross-validation, which will automatically
    # get a good value for regularisation
    model = ElasticNetCV()
    model.fit(X, y)
    coefs = model.coef_
    output = model.predict(X)
    rmse = fitness_fn.rmse(y, output)
    print("rmse", rmse)

    # Assign the magnitude of coefficients as individual fitness
    # values. Have to construct a new individual because tuples are
    # immutable. FIXME this is not a great method -- it's likely that
    # the population will converge on one or a few basis functions,
    # and then the performance of the ENet will decrease because there
    # won't be enough independent basis functions to work with.
    pop = [variga.Individual(genome=pop[i].genome,
                             used_codons=pop[i].used_codons,
                             fitness=-abs(coefs[i]),
                             phenotype=pop[i].phenotype,
                             readable_phenotype=pop[i].readable_phenotype,
                             semantics=pop[i].semantics)
           for i in range(len(pop))]

    pop.sort(key=variga.ind_compare)
Esempio n. 34
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)

    filled = train.copy()
    # iterate over all users
    for u in range(train.shape[0]):
        # remove the current user for training
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        if np.sum(bu) > 5:
            reg.fit(curtrain[:,bu].T, train[u, bu])

            # Fill the values that were not there already
            filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
    return norm.inverse_transform(filled)
Esempio n. 35
0
def regress(x, y, title):
    clf = ElasticNetCV(max_iter=200, cv=10, l1_ratio = [.1, .5, .7, .9, .95, .99, 1])

    clf.fit(x, y)
    print "Score", clf.score(x, y)

    pred = clf.predict(x)
    plt.title("Scatter plot of prediction and " + title)
    plt.xlabel("Prediction")
    plt.ylabel("Target")
    plt.scatter(y, pred)

    # Show perfect fit line
    if "Boston" in title:
        plt.plot(y, y, label="Perfect Fit")
        plt.legend()

    plt.grid(True)
    plt.show()
Esempio n. 36
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)

    filled = train.copy()
    # 모든 사용자에 대해 반복
    for u in range(train.shape[0]):
        # 훈련에서 현재 사용자 제거
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        if np.sum(bu) > 5:
            reg.fit(curtrain[:,bu].T, train[u, bu])

            # 이전에 없는 값을 넣는다
            filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
    return norm.inverse_transform(filled)
Esempio n. 37
0
def build_regression(dat, start, n):
	print('Building linear regression...')
	from sklearn import datasets, linear_model
	from sklearn.linear_model import ElasticNetCV
	from sklearn.metrics import r2_score
	from sklearn.metrics import mean_absolute_error

	dat = dat.drop(dat.columns[1:-5], axis=1)
	df = feature_engineer(dat,start,n)
	# # Split the targets into training/testing sets
	train = df[df['train']==1]
	test = df[df['train']==0]
	df_x_train = train.ix[:,1:-1] 
	df_x_test = test.ix[:,1:-1] 
	df_y_train = train.ix[:,0] 
	df_y_test = test.ix[:,0] 

	##### The parameter l1_ratio corresponds to alpha in the glmnet R package
	# while alpha corresponds to the lambda parameter in glmnet. Specifically, 
	# l1_ratio = 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable, 
	# unless you supply your own sequence of alpha.

	df_x_test= df_x_test.reset_index(drop=True)
	lenn = df_x_test[df_x_test['qindex']==max(df_x_test['qindex'])-1].shape[0]
	lena = df_x_test[df_x_test['qindex']==max(df_x_test['qindex'])].shape[0]
	############### elasticnet cv ##########
	temp=[]
	enetcv = ElasticNetCV(l1_ratio=[.01, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99, 1])
	enetcv.fit(df_x_train, df_y_train)

	########## next quarter ###########
	pred_next=[]
	for i in range(lenn):
		y_pre = enetcv.predict(df_x_test.iloc[i,:].values.reshape(1,-1))[0] 
		pred_next.append(y_pre)
		if i < lenn:
			week = df_x_test.ix[i,'time']
			quarter = df_x_test.ix[i,'qindex']
			nextq = np.where((df_x_test['time']==week) & (df_x_test['qindex']==quarter+1))[0][0]
			df_x_test.ix[nextq,'lastq'] = y_pre
			df_x_test.ix[nextq,'avepnl'] = df_x_test.ix[(nextq-n+1):nextq,'lolastweek'].mean(axis=0)
			df_x_test.ix[nextq,'prop'] = df_x_test.ix[nextq,'avepnw']/df_x_test.ix[nextq,'avepnl']*df_x_test.ix[nextq,'lastq']
			if i < lenn-1:
				nextw = np.where((df_x_test['time']==week+1) & (df_x_test['qindex']==quarter))[0][0]
				nextwq = np.where((df_x_test['time']==week+1) & (df_x_test['qindex']==quarter+1))[0][0]
				df_x_test.ix[nextw,'lastw'] = y_pre
				df_x_test.ix[nextwq,'lolastweek'] = y_pre
				df_x_test.ix[nextw,'avepnw'] = df_x_test.ix[max(nextw-n+1,1):nextw,'lastw'].mean(axis=0)
				df_x_test.ix[nextw,'prop'] = df_x_test.ix[nextw,'avepnw']/df_x_test.ix[nextw,'avepnl']*df_x_test.ix[nextw,'lastq']

	############ quarter after next ##########
	pred_afternext = []

	for i in range(lena):
		y_pre = enetcv.predict(df_x_test.iloc[(i+lenn),:].values.reshape(1,-1))[0]
		pred_afternext.append(y_pre)
		if i < lena-1:  
			week = df_x_test.ix[i+lenn,'time']
			quarter = df_x_test.ix[i+lenn,'qindex']
			nextw = np.where((df_x_test['time']==week+1) & (df_x_test['qindex']==quarter))[0][0]
			df_x_test.ix[nextw,'lastw'] = y_pre
			df_x_test.ix[nextw,'avepnw'] = df_x_test.ix[max(nextw-n+1,1):nextw,'lastw'].mean(axis=0)
			df_x_test.ix[nextw, 'prop'] = df_x_test.ix[nextw,'avepnw']/df_x_test.ix[nextw,'avepnl']*df_x_test.ix[nextw,'lastq']
	y_pred_enetcv = pred_next + pred_afternext
	#print(mean_absolute_error(df_y_test, y_pred_enetcv, sample_weight=None, multioutput='uniform_average'))
	r2_score_enetcv = r2_score(df_y_test, y_pred_enetcv)
	# print("r^2 on test data : %f" % r2_score_enetcv)


	return np.array(pred_next), np.array(pred_afternext)
Esempio n. 38
0
#%%
#try elastic net

#alpha equals lambda here
lambda_grid = [0.01, 0.1 , 1, 10,100]
l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9]

enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,alphas=lambda_grid,cv=3,n_jobs=-1,verbose=True)

enet_CV.fit(train_X,train_Y)

#%%
#show
enet_CV.score(test_X,test_Y)
plt.plot(enet_CV.predict(test_X),test_Y,'o')
#%%
#try svr

svr = SVR(kernel = 'rbf',C=1,cache_size=2000)

SVR_params = { 'C' : [1e-1,1.0,1e2,1e3,1e4] }
svr_rs = grid_search.RandomizedSearchCV(svr,SVR_params,verbose=True,n_jobs=-1)

svr.fit(train_X[:,whichones[0]],train_Y)

#%%
#try bagging/boosting etc
#rfr = RandomForestRegressor(n_estimators = 30,n_jobs = 2)

#rfr.fit(train_X,train_Y)
Esempio n. 39
0
performance_negativebinomial = []
for x in [0.01,0.1,1,5,10]:
    cost = []
    for a,b in cross_validation_object:
        resultingmodel = sm.NegativeBinomial(Y[a],X[a],loglike_method = 'geometric')
        #res = resultingmodel.fit(disp=False, maxiter = 200)
        res2 = resultingmodel.fit_regularized(alpha = x, maxiter = 200)
        cost.append(mean_squared_error(res2.predict(X[b]), Y[b]))
    performance_negativebinomial.append(np.mean(cost))


##### Log linear model ########## not even close. 
from sklearn.linear_model import ElasticNetCV
linear_fit = ElasticNetCV(cv = cross_validation_object, alphas = [0.01,0.1,1,5,10])
linear_fit.fit(X,np.log(Y+1))
mean_squared_error(np.exp(linear_fit.predict(X)) - 1, Y)


########## creating final model using train data + test data


X_test,Y_test,junk = prepare_for_model('Dogs_Final_Test.csv',1)
X,Y,junk = prepare_for_model('Dogs_Final_Train.csv',1)
scaler = MinMaxScaler([0,1])
X_all = scaler.fit_transform(np.vstack((X_test,X)))
Y_all = np.hstack((Y_test,Y))
Y_all = np.array([30 if i > 30 else i for i in Y_all])
final_model = sm.NegativeBinomial(Y_all,X_all,loglike_method = 'geometric')
res2 = final_model.fit_regularized( alpha = 5, maxiter = 200)

Esempio n. 40
0
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.cross_validation import train_test_split

__doc__ = "See newcomparison.m"

l1_ratio = 0.5
k_fold = 10
test_frac = 0.5
data_root = path.expanduser('~/data')

# Load MNIST data
mnist = fetch_mldata('MNIST original', data_home=data_root)
X = mnist.data
y = mnist.target

# Split into train/test_frac
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_frac, random_state=0)

# Construct and fit model
en = ElasticNetCV(cv=k_fold, n_jobs=-1, random_state=0)
en.fit(X_train, y_train)

# Evaluate performance
y_pred = np.round(en.predict(X_test))
conf_mat = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)

print(acc)
Esempio n. 41
0
predicted=lr.predict(X)
'''validation'''
kf=KFold(len(X),n_folds=5)
p=np.zeros_like(y)
for train,test in kf:
    lr.fit(X[train],y[train])
    p[test]=lr.predict(X[test])
rmse_cv=np.sqrt(mean_squared_error(p,y))
print "RMSE of 5-fold cv {:.2}".format(rmse_cv)
'''ElasticNet'''
from sklearn.linear_model import ElasticNetCV
met=ElasticNetCV(n_jobs=-1)
p=np.zeros_like(y)
for t,tst in kf:
    met.fit(X[t],y[t])
    p[tst]=met.predict(X[tst])
p2=r2_score(y,p)
print met.score(X,y)
print p2,"Elastic"





exit()
plt.scatter(predicted,y)
plt.xlabel("Predicted")
plt.ylabel("Actual ")
plt.plot([y.min(),y.max()],[[y.min()],[y.max()]])
plt.show()
forest_cv_score = cross_validation.cross_val_score(coef_path_forest_cv, X, binary_y, n_jobs=2, cv=CV, scoring='roc_auc')
lasso_cv_score = cross_validation.cross_val_score(coef_path_lasso_cv, X, y, n_jobs=2, cv=CV, scoring=Scoring)
elastic_cv_score = cross_validation.cross_val_score(coef_path_elastic_cv, X, y, n_jobs=2, cv=CV, scoring=Scoring)
logistic_cv_score = cross_validation.cross_val_score(coef_path_logistic_cv, X, binary_y, n_jobs=2, cv=CV, scoring='roc_auc')
binary_x_logistic_cv_score = cross_validation.cross_val_score(coef_path_binary_x_logistic_cv, binary_X, binary_y, n_jobs=2, cv=CV, scoring='roc_auc')

forest_results_parameters = [ coef_path_forest_cv.predict(X), coef_path_forest_cv.get_params, coef_path_forest_cv.feature_importances_, 
				coef_path_forest_cv.classes_, coef_path_forest_cv.n_classes_]
forest_scores = [forest_cv_score, classification_report(binary_y, forest_results_parameters[0]), 'forest']

lasso_results_parameters = [coef_path_lasso_cv.predict(X), coef_path_lasso_cv.get_params, coef_path_lasso_cv.alphas_, coef_path_lasso_cv.coef_]  

lasso_scores = [lasso_cv_score, r2_score(y,lasso_results_parameters[0]), 'lasso']

elastic_results_parameters = [ coef_path_elastic_cv.predict(X), coef_path_elastic_cv.get_params, coef_path_elastic_cv.alphas_ ,
				coef_path_elastic_cv.coef_]
elastic_scores = [elastic_cv_score, r2_score(y,elastic_results_parameters[0]), 'elastic']

logistic_results_parameters = [coef_path_logistic_cv.predict(X), coef_path_logistic_cv.get_params, coef_path_logistic_cv.coef_]

logistic_scores = [logistic_cv_score, classification_report(binary_y, logistic_results_parameters[0]), 'logistic']

binary_x_logistic_results_parameters = [coef_path_binary_x_logistic_cv.predict(X), coef_path_binary_x_logistic_cv.get_params, coef_path_binary_x_logistic_cv.coef_]

binary_x_logistic_scores = [binary_x_logistic_cv_score, classification_report(binary_y, binary_x_logistic_results_parameters[0]), 'binary_logistic']

##LINEAR REGRESSION METHOD BEGIN
reduced_feature_matrix_logistic = []
print "list of features from logistic regression:%d" % len(logistic_results_parameters[2][0])
print len(X[0])
met = ElasticNetCV()

features = sales_merged[['PMI_Portfolio_AVB_Boost', 'PMI_Portfolio_PFP_Boost',
       'PMI_Portfolio_PPRP', 'PMI_Portfolio_SA', 'SubFam_Hostess',
       'SubFam_PFP_Boost', 'SubFam_RAP', 'SubFam_SA', 'Fam_AVB_Boost',
       'Fam_Hostess', 'Fam_PFP_Boost', 'Fam_RAP', 't', 'Affinity',
       'Brand Character', 'Functional Performance']].as_matrix()
target = sales_merged['Volume_Sales'].as_matrix()

met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])

kf = KFold(len(target), n_folds=5)
pred = np.zeros_like(target)
for train, test in kf:
    met.fit(features[train], target[train])
    pred[test] = met.predict(features[test])

print('[EN CV] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('[EN CV] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
print('')

    
    
    
    
    

    
    
    
    
svr = svm.SVC()
clf = grid_search.GridSearchCV(svr, param_grid)
clf.fit(train, train_label)

predictions = clf.predict(test)
correct = isCorrect(predictions, False)
acc = correct/len(predictions)
print 'SVM acc:', acc

#### eNet ####

enet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],n_alphas=1000,alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
enet.fit(train, train_label)

predictions = enet.predict(test)
correct = isCorrect(predictions, True)
acc = correct/len(predictions)
print 'eNet acc:', acc











#
Esempio n. 45
0
l = []
with h5py.File("ECoG_big_data.h5", "r+") as f1:
    with h5py.File("selected.h5", "r+") as f2:
        for i in range(1, 4):
            sid = "sub" + str(i)
            X = f1[sid]["train_data"][:]
            Y = f1[sid]["train_clabel"][:]
            Yb = f1[sid]["train_blabel"][:]
            Xt = f1[sid]["test_data"][:]
            Yt = f1[sid]["test_clabel"][:]
            Ytb = f1[sid]["test_blabel"][:]
            for finger in range(5):
                for method in ["l1", "mcp", "scad"]:
                    idxc = f2[sid]["finger" + str(finger + 1)][method][:] - 1
                    idxb = f2[sid]["finger" + str(finger + 1)]["l1_l"][:] - 1
                    en = ElasticNetCV()
                    en.fit(X[:, idxc].astype("float64"), Y[:, finger])
                    yp = en.predict(Xt[:, idxc])
                    corr = np.corrcoef(yp, Yt[:, finger])[0, 1]
                    if corr < 0.3:
                        break
                    else:
                        l.append([sid + "//" + "finger" + str(finger + 1), corr])
                        lr = LogisticRegressionCV()
                        lr.fit(X[:, idxc], Yb[:, finger])
                        tp = yp * fun(lr.predict(Xt[:, idxc]))
                        m = np.where(np.convolve(tp, np.ones((40,)) / 40, mode="same") < 0.5, 0, 1)
                        b, a = butter(2, 9.0 / 25, "low")
                        yy = relu(filtfilt(b, a, tp * m))
                        print corr, np.corrcoef(Yt[:, finger], yy)[0, 1]
# It is made available under the MIT License

import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.cross_validation import KFold
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error, r2_score

data, target = load_svmlight_file('data/E2006.train')

# Edit the lines below if you want to switch method:
# met = LinearRegression(fit_intercept=True)
met = ElasticNetCV()

kf = KFold(len(target), n_folds=5)
pred = np.zeros_like(target)
for train, test in kf:
    met.fit(data[train], target[train])
    pred[test] = met.predict(data[test])

print('[EN 0.1] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('[EN 0.1] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
print('')

met.fit(data, target)
pred = met.predict(data)
print('[EN 0.1] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('[EN 0.1] R2 on training, {:.2}'.format(r2_score(target, pred)))


        p[test] = met.predict(x[test])

    r2_cv = r2_score(y, p)
    print('Method: {}'.format(name))
    print('R2 on training: {}'.format(r2_train))
    print('R2 on 5-fold CV: {}'.format(r2_cv))
    print()

# Construct an ElasticNetCV object (use all available CPUs)
met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])

kf = KFold(len(x), n_folds=5)
pred = np.zeros_like(y)
for train, test in kf:
    met.fit(x[train], y[train])
    pred[test] = met.predict(x[test])


print('[EN CV l1_ratio] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(y, p))))
print('[EN CV l1_ratio] R2 on testing (5 fold), {:.2}'.format(r2_score(y, p)))
print('')


'''
# unit version
from time import time
import numpy as np
from step3_vectorize_text import preprocess_4
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
Esempio n. 48
0
print lasso_cv_score
plt.figure()
plt.hist2d(y, lasso_prediction)
plt.ylabel("Predicted Values")
plt.xlabel("Truth Values")
plt.title("Lasso Linear Regression")
plt.savefig("figures/lasso_predicted_truth.png")
print "#######ELASTIC#####"
coef_path_elastic_cv.fit(X,y)
print coef_path_elastic_cv.get_params
print "alphas:" 
print  coef_path_elastic_cv.alphas_
print "coef_:"
print coef_path_elastic_cv.coef_
print "length of elastic terms:%d" % len(coef_path_elastic_cv.coef_)
elastic_predict = coef_path_elastic_cv.predict(X)
elastic_score = coef_path_elastic_cv.score(X,y)
print "elastic_score:%.3g" % elastic_score
elastic_cv_score = cross_validation.cross_val_score(coef_path_elastic_cv, X, y, n_jobs=2, cv=5)
print elastic_cv_score
#print "elastic precision:%.3g" %  precision_score(y, elastic_predict, average='macro') 
plt.figure()
plt.hist2d(y, elastic_predict)
plt.ylabel("Predicted Values")
plt.xlabel("Truth Values")
plt.title("Elastic Linear Regression")
plt.savefig("figures/elastic_predicted_truth.png")
print "#######Logistic#####"
coef_path_logistic_cv.fit(X,binary_y)
print coef_path_logistic_cv.get_params
print "coef_:"
Esempio n. 49
0
def do_validation(data_path, steps=10):
    allfiles = initialize(data_path)
    gbm = GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=6, min_samples_leaf=5, subsample=0.5)
    ada = AdaBoostRegressor(n_estimators=200, learning_rate=1)
    etree = ExtraTreesRegressor(n_estimators=200, n_jobs=-1, min_samples_leaf=5)
    rf = RandomForestRegressor(n_estimators=200, max_features=4, min_samples_leaf=5)
    kn = KNeighborsRegressor(n_neighbors=25)
    logit = LogisticRegression(tol=0.05)
    enet = ElasticNetCV(l1_ratio=0.75, max_iter=1000, tol=0.05)
    svr = SVR(kernel="linear", probability=True)
    ridge = Ridge(alpha=18)
    bridge = BayesianRidge(n_iter=500)

    gbm_metrics = 0.0
    ada_metrics = 0.0
    etree_metrics = 0.0
    rf_metrics = 0.0
    kn_metrics = 0.0
    logit_metrics = 0.0
    svr_metrics = 0.0
    ridge_metrics = 0.0
    bridge_metrics = 0.0
    enet_metrics = 0.0
    nnet_metrics = 0.0

    logistic = LogisticRegression()
    rbm = BernoulliRBM(random_state=0, verbose=True)
    classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

    for i in xrange(steps):
        driver = allfiles[i]
        df, Y = create_merged_dataset(driver)
        df['label'] = Y        
        # Shuffle DF.
        df = df.reindex(np.random.permutation(df.index))

        train = df[:100]
        label = train['label']
        del train['label']

        test = df[100:400]
        Y = test['label']
        del test['label']

        #to_drop = ['driver', 'trip', 'speed1', 'speed2', 'speed3', 'speed4', 'speed5', 'speed6', 'speed7', 'speed8', 'speed9', 
        #        'speed10', 'speed11', 'speed12', 'speed13', 'speed14', 'speed15', 'speed16', 'speed17', 'speed18', 'speed19', 
        #        'speed20', 'speed21', 'speed22', 'speed23', 'speed24', 'speed25', 'speed26', 'speed27', 'speed28', 'speed29', 
        #        'speed30', 'speed31', 'speed32', 'speed33', 'speed34', 'speed35', 'speed36', 'speed37', 'speed38', 'speed39', 
        #        'speed40', 'speed41', 'speed42', 'speed43', 'speed44', 'speed45', 'speed46', 'speed47', 'speed48', 'speed49', 
        #        'speed50', 'speed51', 'speed52', 'speed53', 'speed54', 'speed55', 'speed56', 'speed57', 'speed58', 'speed59', 
        #        'speed60', 'speed61', 'speed62', 'speed63', 'speed64', 'speed65', 'speed66', 'speed67', 'speed68', 'speed69', 
        #        'speed70', 'speed71', 'speed72', 'speed73', 'speed74', 'speed75', 'speed76', 'speed77', 'speed78', 'speed79', 'speed80']
        to_drop = ['driver', 'trip']

        X_train = train.drop(to_drop, 1)
        X_test = test.drop(to_drop, 1)
        
        gbm.fit(X_train, label)
        Y_hat = gbm.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        gbm_metrics += metrics.auc(fpr, tpr) 
        
        ada.fit(X_train, label)
        Y_hat = ada.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        ada_metrics += metrics.auc(fpr, tpr)
    
        etree.fit(X_train, label)
        Y_hat = etree.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        etree_metrics += metrics.auc(fpr, tpr)
        
        rf.fit(X_train, label)
        Y_hat = rf.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        rf_metrics += metrics.auc(fpr, tpr)
        
        kn.fit(X_train, label)
        Y_hat = kn.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        kn_metrics += metrics.auc(fpr, tpr)

        # Linear models.
        to_drop = ['driver', 'trip', 'distance', 'sd_acceleration', 'final_angle', 'mean_acceleration', 'mean_avg_speed', 'sd_inst_speed',
                'sd_avg_speed', 'mean_inst_speed', 'points']

        X_train = train.drop(to_drop, 1)
        X_test = test.drop(to_drop, 1)
        
        logit.fit(X_train, label)
        Y_hat = [i[1] for i in logit.predict_proba(X_test)]
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        logit_metrics += metrics.auc(fpr, tpr)

        svr.fit(X_train, label)
        Y_hat = svr.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        svr_metrics += metrics.auc(fpr, tpr)
        
        ridge.fit(X_train, label)
        Y_hat = ridge.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        ridge_metrics += metrics.auc(fpr, tpr)

        bridge.fit(X_train, label)
        Y_hat = bridge.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        bridge_metrics += metrics.auc(fpr, tpr)

        enet.fit(X_train, label)
        Y_hat = enet.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        enet_metrics += metrics.auc(fpr, tpr)

        classifier.fit(X_train, label)
        Y_hat = classifier.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        nnet_metrics += metrics.auc(fpr, tpr)

    print ""
    print "GBM:", gbm_metrics/steps
    print "AdaBoost:", ada_metrics/steps
    print "Extra Trees:", etree_metrics/steps
    print "RF:", rf_metrics/steps
    print "KN:", kn_metrics/steps
    print ""
    print "Logit:", logit_metrics/steps
    print "SVR:", svr_metrics/steps
    print "Ridge:", ridge_metrics/steps
    print "BayesianRidge:", bridge_metrics/steps
    print "Elastic Net:", enet_metrics/steps
    print "Neural Networks:", nnet_metrics/steps
    print ""
Esempio n. 50
0
#%%
#try elastic net

#alpha equals lambda here
lambda_grid = [0.01, 0.1 , 1, 10,100]
l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9]

enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,cv=3,n_jobs=-1,verbose=True)

enet_CV.fit(train_X,train_Y)

#%%
#show
enet_CV.score(test_X,test_Y)
plt.plot(enet_CV.predict(test_X),test_Y,'o')
#%%
#try svr

svr = SVR(kernel = 'rbf',C=1,cache_size=2000)

SVR_params = { 'C' : [1e-1,1.0,1e2,1e3,1e4] }
svr_rs = grid_search.RandomizedSearchCV(svr,SVR_params,verbose=True,n_jobs=-1)

svr.fit(train_X[:,whichones[0]],train_Y)

#%%
#try bagging/boosting etc
#rfr = RandomForestRegressor(n_estimators = 30,n_jobs = 2)

#rfr.fit(train_X,train_Y)
print '\n------------------------------------------------------------------------------'

#############################################################################################################
# 2. Elastic Net combines both L1 (Ridge) and L2 (Lasso) penalty estimators
############################################################################################################# 
# Like Lasso, Elastic Net can e used for dimensionality reduction

from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

elastic = ElasticNetCV(alphas=np.logspace(-10, 10, 10), normalize=True, cv=10)  
elastic.fit(xtrain, ytrain)

# Train dataset performance
elastic_train_pred = elastic.predict(xtrain)
elastic_train_r2 = r2_score((ytrain), elastic_train_pred)
elastic_train_error = np.sqrt(mean_squared_error(ytrain, elastic_train_pred))

# Test dataset performance
elastic_test_pred = elastic.predict(xtest)
elastic_test_r2 = r2_score((ytest), elastic_test_pred)
elastic_test_error = np.sqrt(mean_squared_error(ytest, elastic_test_pred))

# Build coefficients table
from pandas import DataFrame
elasticcoeff = DataFrame(data.columns, columns = ['Features'])
elasticcoeff['Coefficients'] = elastic.coef_

print 'ELASTIC NET  -------------------------------------------------------------------'
print '\nThe alpha (L1) level selected: {}' .format(elastic.alpha_)
# use same code as before 
r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
print('R2 score (binary movie neighbors): {:.1%}'.format(r2))


from sklearn.linear_model import ElasticNetCV # NOT IN BOOK

reg = ElasticNetCV(alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
filled = train.copy()
# iterate over all users:
for u in range(train.shape[0]):
    curtrain = np.delete(train, u, axis=0)
    bu = binary[u]
    reg.fit(curtrain[:,bu].T, train[u, bu])
    filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
predicted = norm.inverse_transform(filled)
r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
print('R2 score (user regression): {:.1%}'.format(r2))


# SHOPPING BASKET ANALYSIS
# This is the slow version of the code, which will take a long time to
# complete.


from collections import defaultdict
from itertools import chain

# File is downloaded as a compressed file
import gzip