コード例 #1
0
ファイル: chap8.py プロジェクト: haisland0909/python_practice
    def learn_for(self, i):
        reviews        = AbstractEstimateBase.reviews
        reg            = ElasticNetCV(fit_intercept=True, alphas=[
                           0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
        nusers,nmovies = reviews.shape
        u              = reviews[i]
        us             = np.arange(reviews.shape[0])
        us             = np.delete(us, i)
        ps,            = np.where(u.ravel() > 0)
        x              = reviews[us][:, ps].T
        kf             = KFold(len(ps), n_folds=4)
        predictions    = np.zeros(len(ps))
        for train, test in kf:
            xc = x[train].copy()
            x1 = np.array([xi[xi > 0].mean() for xi in xc])
            x1 = np.nan_to_num(x1)

            for i in range(xc.shape[0]):
                xc[i] -= (xc[i] > 0) * x1[i]

            reg.fit(xc, u[train] - x1)

            xc = x[test].copy()
            x1 = np.array([xi[xi > 0].mean() for xi in xc])
            x1 = np.nan_to_num(x1)

            for i in range(xc.shape[0]):
                xc[i] -= (xc[i] > 0) * x1[i]

            p = reg.predict(xc).ravel()
            predictions[test] = p
        fill_preds = np.zeros(nmovies)
        fill_preds[ps] = predictions

        return fill_preds
コード例 #2
0
def train_elasticnet(train_features, train_labels, test_features, num_alphas,
                     skip_cross_validation, alpha, l1_ratio, num_jobs):
    """
  Performs the cross validation, and returns the trained model
  """

    if not skip_cross_validation:
        # use 5 fold cross validation
        model = ElasticNetCV(
            l1_ratio=[0.5, 0.7, 0.9, 0.95, 0.99, 0.995, 0.9995, 1],
            max_iter=30000,
            cv=5,
            n_alphas=num_alphas,
            n_jobs=num_jobs,
            normalize=True,
            tol=0.005)
    else:
        model = ElasticNet(alpha=alpha,
                           l1_ratio=l1_ratio,
                           normalize=True,
                           max_iter=30000,
                           tol=0.005)
    model.fit(train_features, train_labels)

    if not skip_cross_validation:
        print("Optimal alpha is {}".format(model.alpha_))
        print("Optimal l1_ratio is {}".format(model.l1_ratio_))
        print("number of iterations were {}".format(model.n_iter_))

    return model
コード例 #3
0
def data_preprocessing(X, Y):
    # scaling of data

    Y[Y == 0] = -1
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
    X_train = X_train.reshape((3840, 68 * 2))
    X_test = X_test.reshape((960, 68 * 2))
    scaler = StandardScaler()  # doctest: +SKIP
    # Don't cheat - fit only on training data
    scaler.fit(X_train)  # doctest: +SKIP
    X_train = scaler.transform(X_train)  # doctest: +SKIP
    # apply same transformation to test data
    X_test = scaler.transform(X_test)

    # PCA analysis

    pca = PCA(n_components=68)
    pca.fit(X_train)
    X_train = pca.transform(X_train)
    X_test = pca.transform(X_test)

    # Feature selection using lasso and ridge regression

    ElasticNet = ElasticNetCV(cv=10, random_state=0)
    ElasticNet.fit(X_train, Y_train)
    all_features = ElasticNet.coef_
    not_important_features_indices = np.where(all_features == 0)[0]
    X_train = np.delete(X_train, not_important_features_indices, axis=1)
    X_test = np.delete(X_test, not_important_features_indices, axis=1)
    return X_train, X_test, Y_train, Y_test
コード例 #4
0
ファイル: selectors.py プロジェクト: hpi-dhc/morpher-toolkit
    def fit(self, X, y):
        if self.cross_validate_:
            cv_model = ElasticNetCV(
                l1_ratio=[.1, .5, .7, .9, .95, .99, .995, 1],
                eps=0.001,
                n_alphas=100,
                fit_intercept=True,
                normalize=True,
                precompute='auto',
                max_iter=2000,
                tol=0.0001,
                cv=5,
                copy_X=True,
                verbose=0,
                n_jobs=-1,
                positive=False,
                selection='cyclic')

            cv_model.fit(X, y)

            if self.verbose:
                print('Optimal alpha: %.8f' % cv_model.alpha_)
                print('Optimal l1_ratio: %.3f' % cv_model.l1_ratio_)
                print('Number of iterations %d' % cv_model.n_iter_)

            self.classifier_ = ElasticNet(l1_ratio=cv_model.l1_ratio_,
                                          alpha=cv_model.alpha_,
                                          max_iter=cv_model.n_iter_,
                                          fit_intercept=True,
                                          normalize=True)

        self.classifier_.fit(X, y)
コード例 #5
0
def learn_for(reviews, i):
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    u = reviews[i]
    us = range(reviews.shape[0])
    del us[i]
    ps, = np.where(u.toarray().ravel() > 0)
    x = reviews[us][:, ps].T
    y = u.data
    kf = KFold(len(y), n_folds=4)
    predictions = np.zeros(len(ps))
    for train, test in kf:
        xc = x[train].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in xrange(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        reg.fit(xc, y[train] - x1)

        xc = x[test].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in xrange(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        p = np.array(map(reg.predict, xc)).ravel()
        predictions[test] = p
    return predictions
コード例 #6
0
ファイル: app.py プロジェクト: paradoxn/flask_sklearn_demo
def algor_ElasticNetCV():
    request_content = request.form.to_dict()
    df = pd.read_csv(session.get('file'))
    X_train,Y_train = onehot(df)
    params = request_content
    if params['alpha'] != 'None':
        params['alpha'] = [float(params['alpha'])]
    else:
        params['alpha'] = None
    # print(type(params['max_depth']))
    # print(params['max_iter'])
    # elif params['class_weight'] == 'l1':
    # max_iter = int(round(float(params['max_iter'])))
    model = ElasticNetCV(alphas=params['alpha'],
                               l1_ratio=float(params['l1_rotio']),
                               fit_intercept=bool(params['fit_intercept']),
                               normalize=bool(params['normalize']),
                               max_iter=int(params['max_iter']),
                               tol=float(params['tol'],)
                               )
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_train)

    context = {
        'algor': '弹性网回归',
        'roc_AUC': 'None(仅用于分类器)',
        'ACC': 'None(仅用于分类器)',
        'Recall': 'None(仅用于分类器)',
        'F1_score': 'None(仅用于分类器)',
        'Precesion':  'None(仅用于分类器)',
        'R_2' : round(metrics.r2_score(Y_train,y_pred),2)
    }
    return render_template('ElasticNetCV.html', **context)
コード例 #7
0
def l1_enet(ratio):
    '''
    input l1 ratio and return the model, non zero coefficients and cv scores
    training elastic net properly
    '''
    enet_cv = ElasticNetCV(cv=rkf,
                           l1_ratio=ratio,
                           max_iter=1e7,
                           tol=0.001,
                           fit_intercept=fit_int_flag,
                           random_state=rs)
    enet_cv.fit(X_train, y_train)

    # the optimal alpha
    enet_alpha = enet_cv.alpha_
    enet_coefs = enet_cv.coef_
    n_nonzero = len(np.where(abs(enet_coefs) >= 1e-7)[0])
    # Access the errors
    y_predict_test = enet_cv.predict(X_test)
    y_predict_train = enet_cv.predict(X_train)

    # error per cluster
    enet_RMSE_test = np.sqrt(mean_squared_error(y_test, y_predict_test))
    enet_RMSE_train = np.sqrt(mean_squared_error(y_train, y_predict_train))

    return enet_cv, enet_alpha, n_nonzero, enet_RMSE_test, enet_RMSE_train
コード例 #8
0
def calculateAccuracyWithModel(indbest, X_train, y_train, X_test, y_test):
    indbest = list(indbest)
    evalTrain = evaluatedMatrix(indbest, X_train)
    evalTest = evaluatedMatrix(indbest, X_test)

    # Linear regression with elastic net
    regr = ElasticNetCV(random_state=0)
    regr.fit(evalTrain, y_train)
    y_pred = regr.predict(evalTest)
    print(r2_score(y_test, y_pred))
    indbest, regr.coef_ = sortCoef(indbest, regr.coef_)
    model = ""
    i = 0
    if regr.intercept_ not in [0, -0]:
        model = str(coefStr(regr.intercept_))
    for ind in indbest:
        if regr.coef_[i] not in [0, -0]:
            if "-" in str(regr.coef_[i]):
                indCoef = str(coefStr(regr.coef_[i])) + "*" + str(ind)
            elif len(model) > 0:
                indCoef = "+" + str(coefStr(regr.coef_[i])) + "*" + ind
            else:
                indCoef = str(coefStr(regr.coef_[i])) + "*" + ind
            model = model + indCoef
        i = i + 1
    print(model)
コード例 #9
0
ファイル: project_pandas.py プロジェクト: mshariqa/capstone
def score(inEval, X, y):
    indMatrix = pd.DataFrame()
    i = 0
    listEval = list(inEval)
    for ele in listEval:
        evalString = updatedEvalString(ele)
        #Exception handling against log(0)
        try:
            indMatrix[str.format('col{0}', i)] = eval(evalString)
        except ZeroDivisionError:
            continue
        i = i + 1
    # Remove inf with 1
    indMatrix = indMatrix.replace([np.inf, -np.inf], 1)

    # Linear regression with elastic net
    """
    regr = ElasticNet(random_state=0, l1_ratio = 0.1)
    regr.fit(indMatrix,y_train)
    
    
    y_p = regr.predict(indMatrix)
    regr.score(indMatrix,y_train)"""
    regr = ElasticNetCV(cv=2, random_state=0, max_iter=5000)
    regr.fit(indMatrix, y)
    return (regr.score(indMatrix, y))
コード例 #10
0
def main():
    print 'load datas...'
    train, test = data_util.load_dataset()

    y_train_all = train['y']
    del train['ID']
    del train['y']
    id_test = test['ID']
    del test['ID']
    print 'train:', train.shape, ', test:', test.shape

    random_state = 420
    cv_model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, .995, 1], eps=0.001, n_alphas=100, fit_intercept=True,
                            normalize=True, precompute='auto', max_iter=2000, tol=0.0001, cv=5,
                            copy_X=True, verbose=0, n_jobs=-1, positive=False, random_state=random_state,
                            selection='cyclic')
    cv_model.fit(train.values, y_train_all)

    print('Optimal alpha: %.8f' % cv_model.alpha_)
    print('Optimal l1_ratio: %.3f' % cv_model.l1_ratio_)
    print('Number of iterations %d' % cv_model.n_iter_)

    print 'train model with best parameters from CV...'
    model = ElasticNet(l1_ratio=cv_model.l1_ratio_, alpha=cv_model.alpha_, max_iter=cv_model.n_iter_,
                       fit_intercept=True, normalize=True)
    model.fit(train.values, y_train_all)

    print 'predict submit...'
    y_pred = model.predict(test.values)
    df_sub = pd.DataFrame({'ID': id_test, 'y': y_pred})
    df_sub.to_csv('elasticnet_model_result.csv', index=False)   # 0.55828
コード例 #11
0
def creat_model1(x,y):
    
    # rng = np.random.RandomState(31337)
    # kf = KFold(n_splits=10, shuffle=True, random_state=rng)
    # ground = []
    # pred = []
    # xgb_model = xgb.XGBRegressor(n_estimators = 1000, learning_rate=0.05).fit(x,y)
    # score = make_scorer(my_custom_loss_func, greater_is_better=False)
    # scores = -cross_val_score(xgb_model, x, y,cv=10,scoring=score)
    # print(scores)
    
    # for train_index,test_index in kf.split(x):
        # xgb_model = xgb.XGBRegressor(n_estimators = 1000, subsample = 0.8, learning_rate=0.1 ).fit(x[train_index],y[train_index])
        # predictions = xgb_model.predict(x[test_index])
        # actuals = y[test_index]
        # print(my_custom_loss_func(actuals, predictions))
    # xgb_model = xgb.XGBRegressor()
    # clf = 
    
    # clf = LogisticRegressionCV()#cv=10, penalty = 'l2',solver = 'liblinear')
    # print(y)
    # clf.fit(x, y)  
    # score = make_scorer(my_custom_loss_func, greater_is_better=False)
    # scores = -cross_val_score(clf, x, y,cv=10,scoring=score)
    # print(scores)
    enet = ElasticNetCV(l1_ratio = 0.7, cv=10)
    enet.fit(x, y)  
    score = make_scorer(my_custom_loss_func, greater_is_better=False)
    scores = -cross_val_score(enet, x, y,cv=10,scoring=score)
    print(scores)
コード例 #12
0
ファイル: Try3.py プロジェクト: juandoso/Competitions
def regression_NumMosquitos(Xtr, ytr, Xte):
    from sklearn.linear_model import ElasticNetCV
    #model_nm = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], max_iter=10000, cv=4)
    model_nm = ElasticNetCV()
    model_nm.fit(Xtr, ytr)
    results_nm = model_nm.predict(Xte)
    return results_nm
コード例 #13
0
ファイル: Main.py プロジェクト: RomanGutin/Roman_Numerals
def Model(Encoding, Scores, Run_name, step_size, loop_dict, var_dict,
          round_data, ElasticNet_dict, l1_ratios, All_data):
    Pearson_correlations = []
    Data = Encoding.copy()  #copy, so it does not change#
    Data_sets = CV_split(Data, 5)  # The Big 5#
    for cv_round in range(len(Data_sets)):
        score_dict = Scores.copy()  #Randomized scores at the start each time#
        Test_set = Data_sets[cv_round]
        Train_set = exclude(Data_sets,
                            cv_round)  #Keeps everything but the train set#
        Train_set = pd.concat(Train_set)  #All train sets into on dataframe#
        X = Train_set.iloc[:, :Train_set.shape[1] - 1]  #features#
        X['Intercept'] = 1  #add intercept#
        y = pd.DataFrame(Train_set['pMeas'])  #targets#
        AM_EndOfLoopError = []
        AM_EndOfLoopError.append(Get_Error(
            X, y, score_dict))  # The Error Before AM Tuning #
        """AM Tuning Looping Starts Here and Adds a value to End of Loop Error"""
        Loop_num = 1  #
        AM_EndOfLoopError.append(
            Amplitude_Tuning(X, y, step_size, score_dict, Loop_num, Run_name,
                             cv_round, loop_dict, var_dict))
        round_data[cv_round] = loop_dict
        while ((AM_EndOfLoopError[-1] - AM_EndOfLoopError[-2]) /
               (AM_EndOfLoopError[-2])) < -0.001:
            Loop_num += 1
            AM_EndOfLoopError.append(
                Amplitude_Tuning(X, y, step_size, score_dict, Loop_num,
                                 Run_name, cv_round, loop_dict, var_dict))
            round_data[cv_round] = loop_dict
        loop_dict['AM Time Series Data'] = AM_EndOfLoopError
        loop_dict['Final Scores'] = score_dict
        """  AM Tuning is now Finished for the CV_split, Elastic Net is Next """
        EN = ElasticNetCV(l1_ratio=l1_ratios,
                          cv=5,
                          copy_X=True,
                          normalize=True,
                          random_state=23)
        X_train = X.copy()
        X_train.replace(score_dict, inplace=True)
        y_train = y.copy()
        X_test = Test_set.iloc[:, :Test_set.shape[1] - 1]
        X_test.replace(score_dict, inplace=True)
        X_test['Intercept'] = 1
        y_test = pd.DataFrame(Test_set['pMeas'])
        EN.fit(X_train, y_train)
        y_pred = pd.DataFrame(EN.predict(X_test))
        Pearson_correlations.append(np.corrcoef(y_test.T, y_pred.T)[0][1])
        """Save Everything """
        ElasticNet_dict["y_pred"] = y_pred
        ElasticNet_dict['y_test'] = y_test
        ElasticNet_dict['Alpha'] = EN.alpha_
        ElasticNet_dict['l1_ratio'] = EN.l1_ratio_
        ElasticNet_dict['Parameters'] = EN.get_params()
        ElasticNet_dict["AlphaSpace"] = EN.alphas_
        loop_dict['ElasticNet'] = ElasticNet_dict
        round_data[cv_round] = loop_dict
    All_data[Run_name] = round_data
    np.save("All Data.npy", All_data)
    return np.mean(Pearson_correlations)
コード例 #14
0
def train_EN_model(train_x, train_y, _predict_x):
    '''train_x, predict_x = \
        standarize_feature(_train_x, _predict_x)'''
    #l1_ratios = [1e-4, 1e-3, 1e-2, 1e-1]
    #l1_ratios = [1e-5, 1e-4, 1e-3]
    l1_ratios = [0.9, 0.92, 0.95, 0.97, 0.99]
    #l1_ratios = [0.5]
    min_mse = 1
    best_l1_ratio = 0.95
    best_alpha = 0.5
    for r in l1_ratios:
        t1 = time.time()
        reg_en = ElasticNetCV(l1_ratio=r,
                              cv=5,
                              n_jobs=4,
                              verbose=1,
                              precompute=True)
        reg_en.fit(train_x, train_y)
        n_nonzeros = (reg_en.coef_ != 0).sum()
        _mse = np.mean(reg_en.mse_path_,
                       axis=1)[np.where(reg_en.alphas_ == reg_en.alpha_)[0][0]]
        if _mse < min_mse:
            min_mse = _mse
            best_l1_ratio = r
            best_alpha = reg_en.alpha_
            t2 = time.time()
    return best_l1_ratio, best_alpha
コード例 #15
0
def elastic_net_cv(problem, **kwargs):
    r"""High level description.

    Parameters
    ----------
    kwargs['elastic_net_reg_coefs'] must be a list of nonnegative float.  These
    are the multiplier for the penalty term in cross-validation of EN

    kwargs['elastic_net_ratio'] must be between 0 and 1

    kwargs['coef_tolerance'] must be a nonnegative float

    Returns
    -------
    output : tuple
        (optimum, maximum)

    """
    data_list = [datum['data']['values'] for datum in problem.data]
    data = numpy.array(data_list)
    elastic_net = ElasticNetCV(alphas=kwargs['elastic_net_reg_coefs'],
                               l1_ratio=kwargs['elastic_net_ratio'])
    elastic_net.fit(data.T, problem.goal['data']['values'])
    elastic_net_coefficients = elastic_net.coef_
    optimum = [
        problem.data[index]
        for index, element in enumerate(elastic_net_coefficients)
        if abs(element) > kwargs['coef_tolerance']
    ]
    maximum = elastic_net.score(data.T, problem.goal['data']['values'])
    output = (optimum, maximum)
    return output
class _SkLearnElasticNetSolver(BaseSolver):
    @ex.capture
    def __init__(self, data_features: Matrix, output_samples: ColumnVector, n_alphas: int,
                 cross_validation_folds: int, elastic_net_factor: Scalar, _rnd):
        """
        The standard solver of Scikit-Learn for Lasso-Regression.

        Args:
            data_features(Matrix): The input data matrix ``nxd``.
            output_samples(ColumnVector): The output for the given inputs, ``nx1``.
            n_alphas(int): The number of total regularization terms which will be tested by this solver.
            cross_validation_folds(int): The number of cross-validation folds used in this solver.

        """
        super(_SkLearnElasticNetSolver, self).__init__(data_features, output_samples, n_alphas, cross_validation_folds)
        self._model = ElasticNetCV(cv=cross_validation_folds, n_alphas=n_alphas, random_state=_rnd,
                                   l1_ratio=elastic_net_factor, normalize=False)

    def fit(self) -> ColumnVector:
        """
        The method which fits the requested model to the given data.
        """
        self._model.fit(self._data_features, self._output_samples)
        self._fitted_coefficients = self._model.coef_
        return self._fitted_coefficients
コード例 #17
0
    def elasticNetRegNT(self, X, Y, nCV, l1_weights=None):
        """Run elastic net with the given params

        :param X: design matrix
        :param Y: true labels
        :param nCV: number of CVs
        :param l1_weights: weights of the lasso term
        :return:
        """

        # very difficult to choose alpha, better use CV
        # enet = ElasticNet(alpha=self.alpha, l1_ratio=0.8, fit_intercept=False)
        # enet = ElasticNetCV(fit_intercept=False, cv=nCV)
        if (self.useCV):
            enet = ElasticNetCV(cv=nCV, max_iter=self.maxItr, l1_weights=l1_weights,
                                fit_intercept=self.fit_intercept,
                                alphas=self.alphas, l1_ratio=self.l1_ratio)
            enet.fit(X, Y)
            self.cv_alpha = enet.alpha_
        else:
            enet = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio,
                              max_iter=self.maxItr, l1_weights=l1_weights)
            enet.fit(X, Y)

        if self.verbose:
            print("Num of iter: %d"%enet.n_iter_)
        # print("Best alpha: {}, l1_ratio: {}"
        #       .format(enet.alpha_, enet.l1_ratio_))
        # print(enet.get_params())
        ## plot regulation path for testing
        # testReg.lassoElasticnetPaths(X, Y)

        return enet.coef_, enet.intercept_
コード例 #18
0
def GLM(X_train, X_test, y_train):

    GLM_Model = ElasticNetCV(random_state=0, tol=0.01, cv=5, max_iter=20000)
    GLM_Model.fit(X_train, y_train)
    y_prediction = GLM_Model.predict(X_test)

    return y_prediction
コード例 #19
0
def elastic_net(Xtrain, Ytrain, Xdev, Ydev, verbose=False):
    """
    Trains and Elastic Net Linear Model on the provided. Scores the model 
    and returns both the model and the score. It also prints the optimal
    hyperparameters.

    Inputs:
        Xtrain
        Ytrain
        Xdev
        Ydev

    Returns:
        float: the R^2 on the dev data for the best model specifications.
        ElasticNetCV: the trained model.
    """
    print("\n========================\nTraining Elastic Net\n")
    enet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],
                        max_iter=1e4,
                        tol=1e-2)
    enet.fit(Xtrain, Ytrain)
    best_score = enet.score(Xdev, Ydev)
    results = {
        "R2": best_score,
        "alpha": enet.alpha_,
        "l1_ratio": enet.l1_ratio_
    }
    if verbose:
        results['coefficients'] = enet.coef_.tolist()
    print(results, indent=4)
    return best_score, enet
コード例 #20
0
def runsklelasticnetcv(alpha, x_data, y_data, descent_type):
    """
    This function runs Sci-Kit Learn's ElasticNetCV running the coordinate
    descent algorithm with cross-validation to select optimal lambda 
    (regularization penalty). Function takes  input of alpha, predictor and 
    response data, and descent type ('cyclic' or 'random')

     :param alpha: float
        Value for controlling the ElasticNet's L1_ratio where alpha = 0 is a full
        L2 penalty and alpha = 1 is a full L1 penalty.  Values between represent a
        combination of L1 and L2 penalty. 
    :param x_data: numpy array
        Data containing the predictors
    :param y_data: numpy array
        Data containing the response
     :param descent_type: str
        Selection of the coordinate descent algorithm type, either 'random'
        or 'cyclic
    :return betaskl: list of float values
    :return lambskl: float

    """

    encv = ElasticNetCV(l1_ratio=alpha,
                        fit_intercept=False,
                        tol=0.000001,
                        selection=descent_type,
                        max_iter=10000)
    encv.fit(x_data, y_data)
    lambskl = encv.alpha_
    betaskl = encv.coef_

    return betaskl, lambskl
コード例 #21
0
def learn_for(reviews, i):
    reg = ElasticNetCV(fit_intercept=True,
                       alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    nusers, nmovies = reviews.shape
    u = reviews[i]
    us = np.arange(reviews.shape[0])
    us = np.delete(us, i)
    ps, = np.where(u.ravel() > 0)
    x = reviews[us][:, ps].T
    kf = KFold(len(ps), n_folds=4)
    predictions = np.zeros(len(ps))
    for train, test in kf:
        xc = x[train].copy()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        reg.fit(xc, u[train] - x1)

        xc = x[test].copy()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        p = reg.predict(xc).ravel()
        predictions[test] = p
    fill_preds = np.zeros(nmovies)
    fill_preds[ps] = predictions
    return fill_preds
コード例 #22
0
def predict(X_train, X_test, y_train, y_test, features, pic_name, dir):
    """
    The function predicts the tags of X_test by the elastic net model
    :param X_train:
    :param X_test:
    :param y_train:
    :param y_test:
    :param labels:
    :param pic_name:
    :param dir:
    :return:
    """
    model = ElasticNetCV(cv=4)
    model.fit(X_train, y_train)

    predict = model.predict(X_test)
    print("mean absolute error: ", mean_absolute_error(y_test, predict))
    print("r2 error: ", sklearn.metrics.r2_score(y_test, predict))
    print("alpha: ", model.alpha_)
    print("alphas: ", model.alphas_)
    print("iter: ", model.n_iter_)

    x = len(features)
    y = len(model.coef_)
    coefficients = [(d, c) for d, c in zip(features, model.coef_)]
    coefficients_str = ""
    for a, b in coefficients:
        coefficients_str += a + ": " + str("%.4f" % b) + "\n"
    coefficients_str = coefficients_str[:-2]

    print("coef: ", coefficients_str)

    Plot_output.plot_coefficients(coefficients_str, pic_name=pic_name, dir=dir)
    Plot_output.plot_graph(X_test, y_test, predict, pic_name, dir)
コード例 #23
0
def learn_for(reviews, i):
    reg = ElasticNetCV(fit_intercept=True,
                       alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    u = reviews[i]
    us = range(reviews.shape[0])
    us = np.delete(us, i)
    ps, = np.where(u.toarray().ravel() > 0)
    x = reviews[us][:, ps].T
    y = u.data
    kf = KFold(n_splits=4)
    predictions = np.zeros(len(
        u.toarray().ravel()))  # 他のモデルと形を合わせるため,評価が行われていない映画はpredictionsを0にする
    for train, test in kf.split(y):
        xc = x[train].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        reg.fit(xc, y[train] - x1)

        xc = x[test].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in range(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        p = np.array(reg.predict(xc)).ravel()
        predictions[test] = p
    return predictions
コード例 #24
0
def ElasticNet(dataX, dataY, pre_indice):
    '''
    弹性回归得到每个特征的权重系数
    :param dataX:
    :param dataY:
    :param indices:
    :return:
    '''
    Encv = ElasticNetCV(alphas=[0.0001, 0.0005, 0.001, 0.01, 0.1, 1, 10],
                        l1_ratio=[.01, .1, .5, .9, .99],
                        max_iter=5000,
                        random_state=0)
    Encv.fit(dataX, dataY)
    print(Encv)
    weight_coef = Encv.coef_
    print("弹性网络回归的权重系数", list(weight_coef))
    remain_indice = []
    for index, coef in enumerate(weight_coef):
        if (coef != 0):
            remain_indice.append(index)
    print("弹性网络保留的特征序号", remain_indice)
    # Oringal_indice=[]
    # for i in remain_indice:
    #     Oringal_indice.append(pre_indice[i])
    # weight_coef=[abs(weight) for weight in weight_coef]
    # print("弹性网络回归权重系数为",weight_coef)#权重系数越大,
    # # 表明该特征对响应变量的影响越大,所以该特征的重要度越高
    # indice=np.argsort(weight_coef)
    # print(indice)
    return remain_indice
コード例 #25
0
ファイル: usermodel.py プロジェクト: the07/ML
def learn_for(reviews, i):
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    u = reviews[i]
    us = range(reviews.shape[0])
    del us[i]
    ps, = np.where(u.toarray().ravel() > 0)
    x = reviews[us][:, ps].T
    y = u.data
    kf = KFold(len(y), n_folds=4)
    predictions = np.zeros(len(ps))
    for train, test in kf:
        xc = x[train].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in xrange(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        reg.fit(xc, y[train] - x1)

        xc = x[test].copy().toarray()
        x1 = np.array([xi[xi > 0].mean() for xi in xc])
        x1 = np.nan_to_num(x1)

        for i in xrange(xc.shape[0]):
            xc[i] -= (xc[i] > 0) * x1[i]

        p = np.array(map(reg.predict, xc)).ravel()
        predictions[test] = p
    return predictions
コード例 #26
0
    def predict(self, X):
        binary = X > 0
        if self.normalize == True:
            X = self.norm.fit_transform(X)

        num_users, num_movies = X.shape
        clf = ElasticNetCV(alphas=[0.1])
        predicted = X.copy()

        for user in range(num_users):
            #bool array for movies rated by user
            movie_user = binary[user]
            #which users to consider as attributes for regression, in this case all except current user
            neighbors = np.ones((num_users), dtype=bool)
            neighbors[user] = False
            X_train_user = X[neighbors]
            X_train_user = X_train_user[:, movie_user].T
            y_train_user = X[user, movie_user]
            clf.fit(X_train_user, y_train_user)
            X_test_user = X[neighbors]
            X_test_user = X_test_user[:, ~movie_user].T
            predicted[user, ~movie_user] = clf.predict(X_test_user)

        if self.normalize == True:
            predicted = self.norm.inverse_transform(predicted)

        return predicted
コード例 #27
0
class my_EN_Classifier(BaseEstimator):
    def __init__(self):
        self.clf = ElasticNetCV(l1_ratio=np.linspace(0.1, 1.0, 15),
                                n_alphas=100,
                                max_iter=10000,
                                n_jobs=-1,
                                cv=10)
        self.sd = StandardScaler()
        self.win = Winsorizer(quantile=0.01)  # 100*quantile = percentail

    def fit(self, X, y):
        X_norm1 = self.sd.fit_transform(X)
        X_norm = self.win.fit(X_norm1)
        X_norm = self.win.transform(X_norm1)
        self.clf.fit(X_norm, y)
        alpha = self.clf.alpha_
        l1 = self.clf.l1_ratio_
        self.refit_estimator = ElasticNet(alpha=alpha,
                                          l1_ratio=l1,
                                          max_iter=10000)
        self.refit_estimator.fit(X_norm, y)
        return self

    def predict(self, X):
        X_norm1 = self.sd.transform(X)
        X_norm = self.win.transform(X_norm1)
        return self.refit_estimator.predict(X_norm)

    def get_info(self):
        info = {}
        info['best_alpha'] = self.clf.alpha_
        info['best_l1'] = self.clf.l1_ratio_
        info['refit_coef'] = self.refit_estimator.coef_
        return info
コード例 #28
0
def mutation_impact(data, attempt):
    impact = {m: None for m in MUTATIONS}
    # Begin CODE
    Y = []
    X = []

    for patient in data:
        hamd = patient.get('baseline_hamd')
        Y.append(hamd)
        patient_mutations = []
        for mutation in patient.iter('Mutation'):
            chromosome = mutation.get('chromosome')
            pos = mutation.get('pos')
            ref = mutation.get('ref')
            alt = mutation.get('alt')
            s = f'chrom_{chromosome}.pos_{pos}.ref_{ref}.alt_{alt}'
            name = clean_name(s, MUTATIONS)
            patient_mutations.append(name)
        patient_X = []
        for mutation in MUTATIONS:
            if mutation in patient_mutations:
                patient_X.append(1)
            else:
                patient_X.append(0)
        X.append(patient_X)

    reg = ElasticNetCV()
    reg.fit(X, Y)
    for i, m in enumerate(MUTATIONS):
        impact[m] = float(reg.coef_[i])

    # End CODE
    return impact
コード例 #29
0
def use_ElasticNet():
    en = ElasticNet(alpha=0.001, l1_ratio=0.8, normalize=True)
    en_scores = cross_val_score(en, X, y, cv=16, scoring='r2')
    encv = ElasticNetCV(alphas=(0.1, 0.01, 0.005, 0.0025, 0.001),
                        l1_ratio=(0.1, 0.25, 0.5, 0.75, 0.8),
                        normalize=True)
    encv.fit(X, y)
    print("ElasticNet : ", en_scores.mean())
コード例 #30
0
def elasticnet_reg(x, y):
    elasticnetcv = ElasticNetCV(cv=20)
    elasticnetcv.fit(x, y)
    elasticnetcv_score = elasticnetcv.score(x, y)
    elasticnetcv_alpha = elasticnetcv.alpha_
    print('ElasticNet R square', elasticnetcv_score)
    print('ElasticNet Alpha', elasticnetcv_alpha)
    return elasticnetcv.coef_
コード例 #31
0
def enetCV():
    print ("Doing elastic net")
    cross_val = cross_validation.ShuffleSplit(len(base_X), n_iter=5, test_size=0.2, random_state=0)
    clf4 = ElasticNetCV(cv=cross_val)
    clf4.fit(base_X, base_Y)
    print ("Score = %f" % clf4.score(base_X, base_Y))
    clf4_pred = clf4.predict(X_test)
    write_to_file("elasticCV.csv", clf4_pred)
コード例 #32
0
def fit_elastic_net_cv(X, y, nfolds=5):
    from sklearn.linear_model import ElasticNetCV
    # The parameter l1_ratio corresponds to alpha in the glmnet R package 
    # while alpha corresponds to the lambda parameter in glmnet
    enet = ElasticNetCV(l1_ratio=np.linspace(0.01, 1.0, 20),
                        alphas=np.exp(np.linspace(-6, 5, 20)),
                        cv=nfolds)
    enet.fit(X,y)
    return enet
コード例 #33
0
def _elasticnetcv(*,
                  train,
                  test,
                  x_predict=None,
                  metrics,
                  l1_ratio=0.5,
                  eps=0.001,
                  n_alphas=100,
                  alphas=None,
                  fit_intercept=True,
                  normalize=False,
                  precompute='auto',
                  max_iter=1000,
                  tol=0.0001,
                  cv=None,
                  copy_X=True,
                  verbose=0,
                  n_jobs=None,
                  positive=False,
                  random_state=None,
                  selection='cyclic'):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNetCV.html#sklearn.linear_model.ElasticNetCV
    """

    model = ElasticNetCV(l1_ratio=l1_ratio,
                         eps=eps,
                         n_alphas=n_alphas,
                         alphas=alphas,
                         fit_intercept=fit_intercept,
                         normalize=normalize,
                         precompute=precompute,
                         max_iter=max_iter,
                         tol=tol,
                         cv=cv,
                         copy_X=copy_X,
                         verbose=verbose,
                         n_jobs=n_jobs,
                         positive=positive,
                         random_state=random_state,
                         selection=selection)
    model.fit(train[0], train[1])
    model_name = 'ElasticNetCV'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
コード例 #34
0
ファイル: fea-sel.py プロジェクト: ox040c/bnp
def elasticNet(argv):
    data = pd.read_csv(argv, index_col=0)
    y = data['target']
    X = data.drop('target', axis=1)
    featureNames = X.columns.values
    enet = ElasticNetCV(n_jobs=-1, normalize=True)
    enet.fit(X, y)
    dropIdx = featureNames[enet.coef_ < 1e-5]
    print "Elastic Net drop: %d" % len(dropIdx)
    print dropIdx
    data.drop(dropIdx, axis=1, inplace=True)
    data.to_csv(argv+'.enet.csv')
    return enet
コード例 #35
0
ファイル: lccb.py プロジェクト: jmmcd/PODI
def LCCB_coevo(fitness_fn, pop):
    y = fitness_fn.train_y
    # Make a new array composed of pop[i].semantics for all i
    # (pop[i].semantics has already been calculated)
    X = None
    for ind in pop:
        if (ind.phenotype and ind.fitness != sys.maxint
            and all(np.isfinite(ind.semantics))):
            col = ind.semantics
        else:
            print("Omitting a column")
            col = np.zeros(len(y))
        if X is None:
            X = col
        else:
            X = np.c_[X, col]

    eps = 5e-3

    # FIXME FFX processes the data so that has zero mean and unit
    # variance before applying the LR... should we do that?

    # Use ElasticNet with cross-validation, which will automatically
    # get a good value for regularisation
    model = ElasticNetCV()
    model.fit(X, y)
    coefs = model.coef_
    output = model.predict(X)
    rmse = fitness_fn.rmse(y, output)
    print("rmse", rmse)

    # Assign the magnitude of coefficients as individual fitness
    # values. Have to construct a new individual because tuples are
    # immutable. FIXME this is not a great method -- it's likely that
    # the population will converge on one or a few basis functions,
    # and then the performance of the ENet will decrease because there
    # won't be enough independent basis functions to work with.
    pop = [variga.Individual(genome=pop[i].genome,
                             used_codons=pop[i].used_codons,
                             fitness=-abs(coefs[i]),
                             phenotype=pop[i].phenotype,
                             readable_phenotype=pop[i].readable_phenotype,
                             semantics=pop[i].semantics)
           for i in range(len(pop))]

    pop.sort(key=variga.ind_compare)
コード例 #36
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)

    filled = train.copy()
    # 모든 사용자에 대해 반복
    for u in range(train.shape[0]):
        # 훈련에서 현재 사용자 제거
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        if np.sum(bu) > 5:
            reg.fit(curtrain[:,bu].T, train[u, bu])

            # 이전에 없는 값을 넣는다
            filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
    return norm.inverse_transform(filled)
コード例 #37
0
ファイル: encv.py プロジェクト: clover9gu/pydata-learn
def regress(x, y, title):
    clf = ElasticNetCV(max_iter=200, cv=10, l1_ratio = [.1, .5, .7, .9, .95, .99, 1])

    clf.fit(x, y)
    print "Score", clf.score(x, y)

    pred = clf.predict(x)
    plt.title("Scatter plot of prediction and " + title)
    plt.xlabel("Prediction")
    plt.ylabel("Target")
    plt.scatter(y, pred)

    # Show perfect fit line
    if "Boston" in title:
        plt.plot(y, y, label="Perfect Fit")
        plt.legend()

    plt.grid(True)
    plt.show()
コード例 #38
0
def predict(train):
    binary = (train > 0)
    reg = ElasticNetCV(fit_intercept=True, alphas=[
                       0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
    norm = NormalizePositive()
    train = norm.fit_transform(train)

    filled = train.copy()
    # iterate over all users
    for u in range(train.shape[0]):
        # remove the current user for training
        curtrain = np.delete(train, u, axis=0)
        bu = binary[u]
        if np.sum(bu) > 5:
            reg.fit(curtrain[:,bu].T, train[u, bu])

            # Fill the values that were not there already
            filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
    return norm.inverse_transform(filled)
コード例 #39
0
def enet_granger_causality_cv(X_t, y_t, cv, alphas, top_num=None, top_perc=4,max_iter=100, lambdas=None):

    # alph ais the l1_ratio
    if lambdas != None:
        use_lambdas = np.tile(lambdas, len(alphas)).reshape(len(alphas), len(lambdas))
        enet = ElasticNetCV(l1_ratio=alphas, alphas=use_lambdas, cv=cv, max_iter=max_iter)
        fit = enet.fit(X_t, y_t)

        use_lambdas = fit.alphas_
        use_lambdas = np.tile(use_lambdas, len(alphas)).reshape(len(alphas), len(lambdas))
        print "Used lambdas"
        print use_lambdas

    else:
        enet = ElasticNetCV(l1_ratio=alphas,  cv=cv, max_iter=max_iter)
        fit  = enet.fit(X_t, y_t)
        use_lambdas = fit.alphas_


    # lambdas is a matrix

    cv_mses = enet.mse_path_.sum(axis=2).flatten()


    cv_alphas = np.repeat(alphas, use_lambdas.shape[1])
    cv_lambdas = use_lambdas.flatten()

    if top_num == None:
        print "Num cv alphas: ", len(cv_alphas)

        top_num = int(len(cv_alphas) * top_perc / 100.0)
        print "Top num ", top_num

    # this will keep the smallest
    top_indices, top_mses = get_min_k(cv_mses, top_num)

    top_lambdas = cv_lambdas[top_indices]
    top_alphas = cv_alphas[top_indices]

    top_df = pd.DataFrame(data={"lambda.min": top_lambdas, "alpha": top_alphas, "error.min": top_mses})

    return top_df
コード例 #40
0
ファイル: sysgenb.py プロジェクト: kemaleren/sysgenb
def train_model(data, target, n_iter, rate):
    """Bootstraps, trains ElasticNetCV model, selects features, and
    trains final linear regression model.

    Returns model and selected features.

    """
    coefs = []
    for i in range(n_iter):
        print "bootstrap iter {}".format(i)
        indices = np.random.choice(len(data), size=len(data), replace=True)
        sample_data = data[indices]
        sample_target = target[indices]
        model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],
                             max_iter=10000, n_jobs=4)
        model.fit(sample_data, sample_target)
        coefs.append(model.coef_)
    coefs = np.vstack(coefs)
    rate_selected = make_rates(coefs)
    selected1 = np.nonzero(rate_selected >= rate)[0]
    selected2 = np.argsort(rate_selected)[-50:]
    selected = selected1 if len(selected1) < len(selected2) else selected2
    model = LinearRegression()
    model.fit(data[:, selected], target)

    model_full = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],
                              max_iter=10000, n_jobs=4)
    model_full.fit(data, target)

    return model_full, model, selected, coefs
コード例 #41
0
ファイル: supervised.py プロジェクト: boomsbloom/nlp_fmri
def eNetModel(data, labels, featureNames, texts, documents, nFolds):
    # run SVM with grid search for parameters and leave-one-out cross validation
    kf = KFold(len(texts), n_folds=nFolds)
    acc = 0
    mean_coefs = []
    for train, test in kf:

        # test_docs = {}
        label_train = labels[train]
        #selected_feats = getSelectedFeatures(train, test, texts, featureNames, documents, label_train, nFeats)

        full_train_data, full_test_data, label_train, label_test = data[train], data[test], labels[train], labels[test]

        #data_train = sortBySelected(full_train_data, selected_feats, featureNames)
        #data_test = sortBySelected(full_test_data, selected_feats, featureNames)

        data_train = full_train_data
        data_test = full_test_data

        enet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],n_alphas=1000,alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])

        enet.fit(data_train, label_train)

        data_train = np.asarray(data_train,dtype=float)
        label_train = np.asarray(label_train,dtype=float)

        vals = enet.path(data_train, label_train)
        mean_coefs.append(np.mean(vals[1],axis=1))

        if label_test == 1 and enet.predict(data_test) > 0.5:
            acc += 1
        elif label_test == 0 and enet.predict(data_test) < 0.5:
            acc += 1

        if len(mean_coefs) % 10 == 0:
            print str(len(mean_coefs)), 'out of %s subs finished' %(str(len(data)))

    mean_coefs = np.mean(np.array(mean_coefs), axis=0)

    return Decimal(acc)/Decimal(len(data)), mean_coefs
コード例 #42
0
    def elastic_net_cv(self, drug_name, l1_ratio=0.5, alphas=None, n_folds=10):

        # Get the data for the requested drug
        xscaled, Y = self._get_one_drug_data(drug_name)

        en = ElasticNetCV(l1_ratio=l1_ratio, alphas=alphas, cv=n_folds)

        encv = en.fit(xscaled, Y)

        self.encv = encv
        print("Best alpha on %s folds : %s" % (n_folds, encv.alpha_))
        #df.sort_values().plot(kind='bar')
        return encv.alpha_
コード例 #43
0
lr.fit(X,y)
predicted=lr.predict(X)
'''validation'''
kf=KFold(len(X),n_folds=5)
p=np.zeros_like(y)
for train,test in kf:
    lr.fit(X[train],y[train])
    p[test]=lr.predict(X[test])
rmse_cv=np.sqrt(mean_squared_error(p,y))
print "RMSE of 5-fold cv {:.2}".format(rmse_cv)
'''ElasticNet'''
from sklearn.linear_model import ElasticNetCV
met=ElasticNetCV(n_jobs=-1)
p=np.zeros_like(y)
for t,tst in kf:
    met.fit(X[t],y[t])
    p[tst]=met.predict(X[tst])
p2=r2_score(y,p)
print met.score(X,y)
print p2,"Elastic"





exit()
plt.scatter(predicted,y)
plt.xlabel("Predicted")
plt.ylabel("Actual ")
plt.plot([y.min(),y.max()],[[y.min()],[y.max()]])
plt.show()
コード例 #44
0
ファイル: linear_regression.py プロジェクト: id774/sandbox
plt.title('Linear Regression with sklearn of Housing prices')
plt.show()
plt.savefig('image5.png')
plt.close()
print('Linear Regression')
print(lr.rank_)
print([x for x in zip(names, lr.coef_)])

from sklearn.linear_model import RidgeCV
rcv = RidgeCV(
    alphas=np.array([0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0]), normalize=True)
rcv.fit(boston.data, boston.target)
print('RidgeCV')
print(rcv.alpha_)
print([x for x in zip(boston.feature_names, rcv.coef_)])

from sklearn.linear_model import LassoCV
lcv = LassoCV(
    alphas=np.array([0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0]), normalize=True)
lcv.fit(boston.data, boston.target)
print('LassoCV')
print(lcv.alpha_)
print([x for x in zip(boston.feature_names, lcv.coef_)])

from sklearn.linear_model import ElasticNetCV
encv = ElasticNetCV(alphas=np.array([0.0001, 0.0003, 0.01, 0.03, 0.1, 0.3, 1.0]),
                    l1_ratio=np.array([0.5, 0.8, 0.9, 0.95, 0.99, 0.995, 1.0]), normalize=True)
encv.fit(boston.data, boston.target)
print('ElasticNetCV')
print([x for x in zip(boston.feature_names, encv.coef_)])
コード例 #45
0
md=dnn_reg(X_train,y_train,X_test,y_test)
reg_eval(X_test,y_test,md)

###Lasso CV regression

def reg_eval2(y_test,model):
    y_pred=model.predict(X_test)
    print("evaluation the results for model:",model)
    print("MSE:",mean_squared_error(y_test,y_pred))
    print("R2:",r2_score(y_test,y_pred))
    print("EVS:",explained_variance_score(y_test,y_pred))

lasso = LassoCV(cv=5, random_state=0,max_iter=10000)
lasso.fit(X_train,y_train)
reg_eval2(y_test,lasso)

#ElasticNet Regressionb
ela = ElasticNetCV(l1_ratio=0.8,normalize=True,max_iter=5000,random_state=77)
ela.fit(X_train,y_train)
print("R square:",ela.score(X_test,y_test))
reg_eval2(y_test,ela)


#SVR Regression
from sklearn.svm import LinearSVR
LSVR=LinearSVR(epsilon=0.1,random_state=0, tol=1e-5,max_iter=10000)
# scaler=RobustScaler()
# pipe=Pipeline(steps=[("scaling",scaler),("rg",LSVR)])
LSVR.fit(X_train,y_train)
reg_eval2(y_test,LSVR))
コード例 #46
0
import numpy as np

X, y = make_sparse_data(n_samples=500, n_features=2000, n_informative=200)
n_cores = [1, 2, 4]
n_alpha = [5, 10, 50, 100]
times = [0] * 12

counter = 0
for _ in range(3):
    for core in n_cores:
        for alpha in n_alpha:
		    clf = ElasticNetCV(n_jobs=core, n_alphas=alpha,
		        	           l1_ratio=0.5, cv=10)
		    print "core = %d, alpha = %d" % (core, alpha)
		    t = time()
		    clf.fit(X, y)
		    times[counter%12] += (time() - t)
		    print times
		    counter += 1

# Got after doing the above. Just for future reference.
core1_mp = [57.457534631093345, 72.31527137756348, 210.2204163869222, 379.9918119907379]
core2_mp = [55.89718206723531, 51.196732918421425, 138.35079900423685, 239.67310031255087]
core3_mp = [42.53018967310587, 49.97517212231954, 122.26631005605061, 204.76643363634744]

core1_t = [60.99967805544535, 75.41305232048035, 219.61244002978006, 390.601344982783]
core2_t = [46.21716833114624, 54.701584259668984, 144.06910300254822, 242.6696043809255]
core3_t = [43.21849703788757, 49.07820804913839, 122.74103697141011, 205.75086871782938]

_, [axis1, axis2, axis3] = pl.subplots(3, 1, sharex=True)
ind = np.arange(4)
コード例 #47
0
#### assessing performance of the negative binomial regression model
performance_negativebinomial = []
for x in [0.01,0.1,1,5,10]:
    cost = []
    for a,b in cross_validation_object:
        resultingmodel = sm.NegativeBinomial(Y[a],X[a],loglike_method = 'geometric')
        #res = resultingmodel.fit(disp=False, maxiter = 200)
        res2 = resultingmodel.fit_regularized(alpha = x, maxiter = 200)
        cost.append(mean_squared_error(res2.predict(X[b]), Y[b]))
    performance_negativebinomial.append(np.mean(cost))


##### Log linear model ########## not even close. 
from sklearn.linear_model import ElasticNetCV
linear_fit = ElasticNetCV(cv = cross_validation_object, alphas = [0.01,0.1,1,5,10])
linear_fit.fit(X,np.log(Y+1))
mean_squared_error(np.exp(linear_fit.predict(X)) - 1, Y)


########## creating final model using train data + test data


X_test,Y_test,junk = prepare_for_model('Dogs_Final_Test.csv',1)
X,Y,junk = prepare_for_model('Dogs_Final_Train.csv',1)
scaler = MinMaxScaler([0,1])
X_all = scaler.fit_transform(np.vstack((X_test,X)))
Y_all = np.hstack((Y_test,Y))
Y_all = np.array([30 if i > 30 else i for i in Y_all])
final_model = sm.NegativeBinomial(Y_all,X_all,loglike_method = 'geometric')
res2 = final_model.fit_regularized( alpha = 5, maxiter = 200)
コード例 #48
0
from sklearn.linear_model import ElasticNetCV
met = ElasticNetCV()

features = sales_merged[['PMI_Portfolio_AVB_Boost', 'PMI_Portfolio_PFP_Boost',
       'PMI_Portfolio_PPRP', 'PMI_Portfolio_SA', 'SubFam_Hostess',
       'SubFam_PFP_Boost', 'SubFam_RAP', 'SubFam_SA', 'Fam_AVB_Boost',
       'Fam_Hostess', 'Fam_PFP_Boost', 'Fam_RAP', 't', 'Affinity',
       'Brand Character', 'Functional Performance']].as_matrix()
target = sales_merged['Volume_Sales'].as_matrix()

met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])

kf = KFold(len(target), n_folds=5)
pred = np.zeros_like(target)
for train, test in kf:
    met.fit(features[train], target[train])
    pred[test] = met.predict(features[test])

print('[EN CV] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('[EN CV] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
print('')

    
    
    
    
    

    
    
    
コード例 #49
0
def do_validation(data_path, steps=10):
    allfiles = initialize(data_path)
    gbm = GradientBoostingRegressor(n_estimators=100, learning_rate=0.05, max_depth=6, min_samples_leaf=5, subsample=0.5)
    ada = AdaBoostRegressor(n_estimators=200, learning_rate=1)
    etree = ExtraTreesRegressor(n_estimators=200, n_jobs=-1, min_samples_leaf=5)
    rf = RandomForestRegressor(n_estimators=200, max_features=4, min_samples_leaf=5)
    kn = KNeighborsRegressor(n_neighbors=25)
    logit = LogisticRegression(tol=0.05)
    enet = ElasticNetCV(l1_ratio=0.75, max_iter=1000, tol=0.05)
    svr = SVR(kernel="linear", probability=True)
    ridge = Ridge(alpha=18)
    bridge = BayesianRidge(n_iter=500)

    gbm_metrics = 0.0
    ada_metrics = 0.0
    etree_metrics = 0.0
    rf_metrics = 0.0
    kn_metrics = 0.0
    logit_metrics = 0.0
    svr_metrics = 0.0
    ridge_metrics = 0.0
    bridge_metrics = 0.0
    enet_metrics = 0.0
    nnet_metrics = 0.0

    logistic = LogisticRegression()
    rbm = BernoulliRBM(random_state=0, verbose=True)
    classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

    for i in xrange(steps):
        driver = allfiles[i]
        df, Y = create_merged_dataset(driver)
        df['label'] = Y        
        # Shuffle DF.
        df = df.reindex(np.random.permutation(df.index))

        train = df[:100]
        label = train['label']
        del train['label']

        test = df[100:400]
        Y = test['label']
        del test['label']

        #to_drop = ['driver', 'trip', 'speed1', 'speed2', 'speed3', 'speed4', 'speed5', 'speed6', 'speed7', 'speed8', 'speed9', 
        #        'speed10', 'speed11', 'speed12', 'speed13', 'speed14', 'speed15', 'speed16', 'speed17', 'speed18', 'speed19', 
        #        'speed20', 'speed21', 'speed22', 'speed23', 'speed24', 'speed25', 'speed26', 'speed27', 'speed28', 'speed29', 
        #        'speed30', 'speed31', 'speed32', 'speed33', 'speed34', 'speed35', 'speed36', 'speed37', 'speed38', 'speed39', 
        #        'speed40', 'speed41', 'speed42', 'speed43', 'speed44', 'speed45', 'speed46', 'speed47', 'speed48', 'speed49', 
        #        'speed50', 'speed51', 'speed52', 'speed53', 'speed54', 'speed55', 'speed56', 'speed57', 'speed58', 'speed59', 
        #        'speed60', 'speed61', 'speed62', 'speed63', 'speed64', 'speed65', 'speed66', 'speed67', 'speed68', 'speed69', 
        #        'speed70', 'speed71', 'speed72', 'speed73', 'speed74', 'speed75', 'speed76', 'speed77', 'speed78', 'speed79', 'speed80']
        to_drop = ['driver', 'trip']

        X_train = train.drop(to_drop, 1)
        X_test = test.drop(to_drop, 1)
        
        gbm.fit(X_train, label)
        Y_hat = gbm.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        gbm_metrics += metrics.auc(fpr, tpr) 
        
        ada.fit(X_train, label)
        Y_hat = ada.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        ada_metrics += metrics.auc(fpr, tpr)
    
        etree.fit(X_train, label)
        Y_hat = etree.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        etree_metrics += metrics.auc(fpr, tpr)
        
        rf.fit(X_train, label)
        Y_hat = rf.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        rf_metrics += metrics.auc(fpr, tpr)
        
        kn.fit(X_train, label)
        Y_hat = kn.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        kn_metrics += metrics.auc(fpr, tpr)

        # Linear models.
        to_drop = ['driver', 'trip', 'distance', 'sd_acceleration', 'final_angle', 'mean_acceleration', 'mean_avg_speed', 'sd_inst_speed',
                'sd_avg_speed', 'mean_inst_speed', 'points']

        X_train = train.drop(to_drop, 1)
        X_test = test.drop(to_drop, 1)
        
        logit.fit(X_train, label)
        Y_hat = [i[1] for i in logit.predict_proba(X_test)]
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        logit_metrics += metrics.auc(fpr, tpr)

        svr.fit(X_train, label)
        Y_hat = svr.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        svr_metrics += metrics.auc(fpr, tpr)
        
        ridge.fit(X_train, label)
        Y_hat = ridge.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        ridge_metrics += metrics.auc(fpr, tpr)

        bridge.fit(X_train, label)
        Y_hat = bridge.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        bridge_metrics += metrics.auc(fpr, tpr)

        enet.fit(X_train, label)
        Y_hat = enet.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        enet_metrics += metrics.auc(fpr, tpr)

        classifier.fit(X_train, label)
        Y_hat = classifier.predict(X_test)
        fpr, tpr, thresholds = metrics.roc_curve(Y, Y_hat)
        nnet_metrics += metrics.auc(fpr, tpr)

    print ""
    print "GBM:", gbm_metrics/steps
    print "AdaBoost:", ada_metrics/steps
    print "Extra Trees:", etree_metrics/steps
    print "RF:", rf_metrics/steps
    print "KN:", kn_metrics/steps
    print ""
    print "Logit:", logit_metrics/steps
    print "SVR:", svr_metrics/steps
    print "Ridge:", ridge_metrics/steps
    print "BayesianRidge:", bridge_metrics/steps
    print "Elastic Net:", enet_metrics/steps
    print "Neural Networks:", nnet_metrics/steps
    print ""
コード例 #50
0
l1_ratio=0.7

enet = ElasticNet(alpha = alpha, l1_ratio = l1_ratio)
enet_model = enet.fit(X_train, y_train)
y_pred_enet = enet_model.predict(X_test)
r2_score_enet = r2_score(y_test, y_pred_enet)

print(enet)
print("r^2 on test data : %f" % r2_score_enet)
# r^2 on test data : 0.100723

# plt.plot(enet.coef_, label='Elastic net coefficients')
# plt.plot(coef, '--', label='original coefficients')
# plt.legend(loc='best')
# plt.title("R^2: %f" % (r2_score_enet))
# plt.show()

# set the parameters alpha and l1_ratio by cross-validation
from sklearn.linear_model import ElasticNetCV

enetcv = ElasticNetCV(l1_ratio=[.1,.2,.3,.4,.5,.6,.7,.8,.9])
enetcv_model = enetcv.fit(X_train, y_train)
y_pred_enetcv = enetcv_model.predict(X_test)
r2_score_enetcv = r2_score(y_test, y_pred_enetcv)

print(enetcv)
print("r^2 on test data : %f" % r2_score_enetcv)
# r^2 on test data : 0.22553

assert(r2_score_enetcv > r2_score_enet)
コード例 #51
0
    def fit(self, raw_array, aux_data_a_d=None, diff=False, feature_s_l=[], holdout_col=0, lag=1, positive_control=False, regression_algorithm_s = 'elastic_net', **kwargs):
        """ Performs an auto-regression of a given lag on the input array. Axis 0 indexes observations (schools) and axis 1 indexes years. For holdout_col>0, the last holdout_col years of data will be withheld from the fitting, which is ideal for training the algorithm. """

        # Apply optional parameters
        if holdout_col > 0:
            raw_array = raw_array[:, :-holdout_col]
        if diff:
            array = np.diff(raw_array, 1, axis=1)
        else:
            array = raw_array

        # Create model and fit parameters
        Y = array[:, lag:].reshape(-1)
        X = np.ndarray((Y.shape[0], 0))
        for i in range(lag):
            X = np.concatenate((X, array[:, i:-lag+i].reshape(-1, 1)), axis=1)
            # Y = X_t = A_1 * X_(t-lag) + A_2 * X_(t-lag+1)) + ... + A_lag * X_(t-1) + A_(lag+1)
        if positive_control:
            X = np.concatenate((X, array[:, lag:].reshape(-1, 1)), axis=1)
        if aux_data_a_d:
            for feature_s in feature_s_l:
                if holdout_col > 0:
                    raw_array = aux_data_a_d[feature_s][:, :-holdout_col]
                else:
                    raw_array = aux_data_a_d[feature_s]
                if diff:
                    array = np.diff(raw_array, 1, axis=1)
                else:
                    array = raw_array
                for i in range(lag):
                    X = np.concatenate((X, array[:, i:-lag+i].reshape(-1, 1)), axis=1)
        estimatorX = Imputer(axis=0)
        X = estimatorX.fit_transform(X)
        estimatorY = Imputer(axis=0)
        Y = estimatorY.fit_transform(Y.reshape(-1, 1)).reshape(-1)

        if regression_algorithm_s == 'elastic_net':
            l1_ratio_l = [.1, .5, .7, .9, .95, .99, 1]
            alpha_l = np.logspace(-15, 5, num=11).tolist()
            max_iter = 1e5
            # It's too slow when I make it high, so I'll keep it low for now
            model = ElasticNetCV(l1_ratio=l1_ratio_l, alphas=alpha_l, max_iter=max_iter,
                                 fit_intercept=True, normalize=True)
        elif regression_algorithm_s == 'gaussian_process':
            model = GaussianProcess()
            # This currently gives the following error: "Exception: Multiple input features cannot have the same target value."
        elif regression_algorithm_s == 'gradient_boosting':
            model = GradientBoostingRegressor(max_features='sqrt')
        elif regression_algorithm_s == 'linear_regression':
            model = LinearRegression(fit_intercept=True, normalize=True)
        elif regression_algorithm_s == 'random_forest':
            model = RandomForestRegressor(max_features='auto')
        model.fit(X, Y)
        if regression_algorithm_s in ['elastic_net', 'linear_regression']:
            with open(os.path.join(config.plot_path, 'coeff_list.txt'), 'a') as f:
                f.write('Lag of {0:d}:\n'.format(lag))
#                f.write('\nElastic net: R^2 = %0.5f, l1_ratio = %0.2f, alpha = %0.1g' %
#                      (model.score(X, Y), model.l1_ratio_, model.alpha_))
                coeff_t = model.coef_
                assert(not positive_control) # The coefficients won't currently line up
                for i_lag in range(lag):
                    f.write('\ti_lag = {0:d}: {1:0.2g}\n'.format(lag-i_lag, coeff_t[i_lag]))
                for i_feature, feature_s in enumerate(feature_s_l):
                    for i_lag in range(lag):
                        f.write('\t{0}:\n\t\ti_lag = {1:d}: {2:0.2g}\n'.format(feature_s, lag-i_lag, coeff_t[lag*(i_feature+1) + i_lag]))

        return model
コード例 #52
0
	else: binary_y_pre.append(0)
binary_y = np.array(binary_y_pre)

coef_path_linear_cv = LinearRegression(normalize=Normalize,fit_intercept=Fit_Intercept) 
coef_path_lasso_cv = LassoCV(normalize=Normalize, max_iter=Max_Iter, copy_X=True, cv=CV, verbose=Verbose, fit_intercept=Fit_Intercept, tol=Tol)#, alphas=Alphas) 
coef_path_elastic_cv = ElasticNetCV(normalize=Normalize,max_iter=Max_Iter, tol=Tol)#,alphas=Alphas)
coef_path_logistic_cv = LogisticRegression( tol=Tol)
coef_path_binary_x_logistic_cv = LogisticRegression( tol=Tol)
coef_path_forest_cv = RandomForestClassifier(n_estimators = N_Estimators, max_features=number_of_features)

binary_X = vectorizer_binary.fit_transform(corpus)
coef_path_forest_cv.fit(X,binary_y)
coef_path_lasso_cv.fit(X,y)
coef_path_binary_x_logistic_cv.fit(binary_X,binary_y)
coef_path_logistic_cv.fit(X,binary_y)
coef_path_elastic_cv.fit(X,y)

forest_cv_score = cross_validation.cross_val_score(coef_path_forest_cv, X, binary_y, n_jobs=2, cv=CV, scoring='roc_auc')
lasso_cv_score = cross_validation.cross_val_score(coef_path_lasso_cv, X, y, n_jobs=2, cv=CV, scoring=Scoring)
elastic_cv_score = cross_validation.cross_val_score(coef_path_elastic_cv, X, y, n_jobs=2, cv=CV, scoring=Scoring)
logistic_cv_score = cross_validation.cross_val_score(coef_path_logistic_cv, X, binary_y, n_jobs=2, cv=CV, scoring='roc_auc')
binary_x_logistic_cv_score = cross_validation.cross_val_score(coef_path_binary_x_logistic_cv, binary_X, binary_y, n_jobs=2, cv=CV, scoring='roc_auc')

forest_results_parameters = [ coef_path_forest_cv.predict(X), coef_path_forest_cv.get_params, coef_path_forest_cv.feature_importances_, 
				coef_path_forest_cv.classes_, coef_path_forest_cv.n_classes_]
forest_scores = [forest_cv_score, classification_report(binary_y, forest_results_parameters[0]), 'forest']

lasso_results_parameters = [coef_path_lasso_cv.predict(X), coef_path_lasso_cv.get_params, coef_path_lasso_cv.alphas_, coef_path_lasso_cv.coef_]  

lasso_scores = [lasso_cv_score, r2_score(y,lasso_results_parameters[0]), 'lasso']
コード例 #53
0
ファイル: EEG-analysis.py プロジェクト: mjboos/EEG-ABUS
X_train,X_test,Y_train,Y_test = train_test_split(alldata,newY,test_size=0.3)

#frequencies
X_train,X_test,Y_train,Y_test = train_test_split(allfreqdata,newY,test_size=0.3)

svr = SVR(cache_size=1500)
svr_params = { 'C' : [1e-2,1,1e2] , 'epsilon' : [1e-3,1e-2,1e-1]  }

#fit without transforms 0.009
#fit with kld 0.017

#test with newy hier. interc.
#takes looong

enet_cv = ElasticNetCV(l1_ratio=[0.1,0.3,0.5,0.7,0.9],max_iter=2000)
enet_cv.fit(X_tr_new,Y_train)

rcv = RidgeCV(alphas=[1e-2,1e-1,1,10])
#rcv.fit(X_train,Y_train)

svr_gs = GridSearchCV(svr,svr_params,verbose=1,n_jobs=-1)
#svr_gs.fit(X_train,Y_train)


#%%
#visualization of posterior ERPs averaged over Pbs and epochs
#for chan Fz
posteriors = np.unique(np.round(bc_dict["01"],decimals=2))

avr_ERP_p_post_list = [get_average_ERPs_per_posterior(mat_dict[k],bc_dict[k],chan=4) for k in sorted(mat_dict.keys())] 
コード例 #54
0
plt.show()

#RidgeCV
from sklearn.linear_model import RidgeCV
model = RidgeCV(cv=20)
model_ridge = model.fit(ratings_ext_input_sim2[X_features], ratings_ext_input_sim2['rating'])
rating_predicted = model_ridge.predict(ratings_ext_input_sim2[X_features])
error = (rating_predicted - ratings_ext_input_sim2['rating'])
np.mean(error*error) #  4.77 (0.633 good?)
score=model_ridge.score(ratings_ext_input_sim2[X_features], ratings_ext_input_sim2['rating'])
model_ridge.coef_

# Elastic Net
from sklearn.linear_model import ElasticNetCV
enet = ElasticNetCV(l1_ratio=0.5,cv = 10) # 1 for LASSO
model_enet = enet.fit(ratings_ext_input_sim2[X_features], ratings_ext_input_sim2['rating'])
rating_predicted = model_enet.predict(ratings_ext_input_sim2[X_features])
error = (rating_predicted - ratings_ext_input_sim2['rating'])
np.mean(error*error)  # 4.168
# alpha = 1, l1_ration = 0: very high 4.67
# alpha = 0.1, l1_ration = 0: very high 4.57
# alpha = 0.5, l1_ration = 0: very high 4.64
# alpha = 0.7, l1_ration = 0: very high 4.65
from sklearn.linear_model import lasso_path, enet_path
model_enet.mse_path_
plt.figure(1)
ax = plt.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
#l1 = plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
l1 = plt.plot(-np.log10(model_enet.alphas_), model_enet.coef_, linestyle='--')
コード例 #55
0
#add intercept
X = np.hstack((np.ones(X.shape[0])[:,None],X))

train_X,test_X,train_Y,test_Y = train_test_split(X,y,test_size=0.1)


#%%
#try elastic net

#alpha equals lambda here
lambda_grid = [0.01, 0.1 , 1, 10,100]
l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9]

enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,alphas=lambda_grid,cv=3,n_jobs=-1,verbose=True)

enet_CV.fit(train_X,train_Y)

#%%
#show
enet_CV.score(test_X,test_Y)
plt.plot(enet_CV.predict(test_X),test_Y,'o')
#%%
#try svr

svr = SVR(kernel = 'rbf',C=1,cache_size=2000)

SVR_params = { 'C' : [1e-1,1.0,1e2,1e3,1e4] }
svr_rs = grid_search.RandomizedSearchCV(svr,SVR_params,verbose=True,n_jobs=-1)

svr.fit(train_X[:,whichones[0]],train_Y)
コード例 #56
0
features = features.dropna(axis=1)

alpha_values = []
for a in range(1, 10001):
    alpha_values.append(a / 100)

print "Started at " + str(datetime.now())

estimator_ridge = RidgeCV(alphas=alpha_values, cv=3)
estimator_ridge.fit(features, goal)
scores = cross_val_score(Ridge(alpha=estimator_ridge.alpha_), features, goal, cv=5)
print "Ridge alpha " + str(estimator_ridge.alpha_)
print str(np.mean(scores))
print scores

estimator_lasso = LassoCV(alphas=alpha_values, cv=3)
estimator_lasso.fit(features, goal)
scores = cross_val_score(Lasso(alpha=estimator_lasso.alpha_), features, goal, cv=5)
print "Lasso alpha " + str(estimator_lasso.alpha_)
print str(np.mean(scores))
print scores


estimator_elastic_net = ElasticNetCV(alphas=alpha_values, cv=3, n_jobs=-1)
estimator_elastic_net.fit(features, goal)
scores = cross_val_score(ElasticNet(alpha=estimator_elastic_net.alpha_), features, goal, cv=5)
print "ElasticNet alpha " + str(estimator_elastic_net.alpha_)
print str(np.mean(scores))
print scores

print "Finished at " + str(datetime.now())
コード例 #57
0
        met.fit(x[train], y[train])
        p[test] = met.predict(x[test])

    r2_cv = r2_score(y, p)
    print('Method: {}'.format(name))
    print('R2 on training: {}'.format(r2_train))
    print('R2 on 5-fold CV: {}'.format(r2_cv))
    print()

# Construct an ElasticNetCV object (use all available CPUs)
met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])

kf = KFold(len(x), n_folds=5)
pred = np.zeros_like(y)
for train, test in kf:
    met.fit(x[train], y[train])
    pred[test] = met.predict(x[test])


print('[EN CV l1_ratio] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(y, p))))
print('[EN CV l1_ratio] R2 on testing (5 fold), {:.2}'.format(r2_score(y, p)))
print('')


'''
# unit version
from time import time
import numpy as np
from step3_vectorize_text import preprocess_4
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
コード例 #58
0
ファイル: regression.py プロジェクト: ZiqianXie/ECoG_analysis
l = []
with h5py.File("ECoG_big_data.h5", "r+") as f1:
    with h5py.File("selected.h5", "r+") as f2:
        for i in range(1, 4):
            sid = "sub" + str(i)
            X = f1[sid]["train_data"][:]
            Y = f1[sid]["train_clabel"][:]
            Yb = f1[sid]["train_blabel"][:]
            Xt = f1[sid]["test_data"][:]
            Yt = f1[sid]["test_clabel"][:]
            Ytb = f1[sid]["test_blabel"][:]
            for finger in range(5):
                for method in ["l1", "mcp", "scad"]:
                    idxc = f2[sid]["finger" + str(finger + 1)][method][:] - 1
                    idxb = f2[sid]["finger" + str(finger + 1)]["l1_l"][:] - 1
                    en = ElasticNetCV()
                    en.fit(X[:, idxc].astype("float64"), Y[:, finger])
                    yp = en.predict(Xt[:, idxc])
                    corr = np.corrcoef(yp, Yt[:, finger])[0, 1]
                    if corr < 0.3:
                        break
                    else:
                        l.append([sid + "//" + "finger" + str(finger + 1), corr])
                        lr = LogisticRegressionCV()
                        lr.fit(X[:, idxc], Yb[:, finger])
                        tp = yp * fun(lr.predict(Xt[:, idxc]))
                        m = np.where(np.convolve(tp, np.ones((40,)) / 40, mode="same") < 0.5, 0, 1)
                        b, a = butter(2, 9.0 / 25, "low")
                        yy = relu(filtfilt(b, a, tp * m))
                        print corr, np.corrcoef(Yt[:, finger], yy)[0, 1]