def Regularized_Smap(abund, target_otu, theta, l_grid, iteration, cv, train_len):
    print('Process data for otu No. %s' % str(target_otu+1))
    # Make input for the elastic_net
    block = np.append(abund[1:, target_otu], abund[0:-1, ], axis=1)
    ##Delete the uncontinuous states
    block = np.delete(block, [abund.shape[0] / 3 - 1, abund.shape[0] / 3 * 2 - 1], axis=0)
    ##Scaling the input
    ##Each time series is normalized to have a mean of 0 and standard deviation of 1 before analysis with S-maps
    block = (block - np.average(block, axis=0)) / np.std(block, axis=0)

    ##Select data and fitting
    print('Start fitting.')
    lib = range(block.shape[0])
    coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1))
    fit_results = np.empty(shape=(block.shape[0], 13))

    for ipred in lib:
        print('\r', 'Complete percentage: %.2f%%' % (ipred / len(lib) * 100), end="", flush=True)
        sub_block = np.delete(block, ipred, axis=0)
        q = block[lib[ipred], :]
        ###Calculate weights
        E_dist = np.sqrt(np.sum(np.array(sub_block[:, 1:] - q[:, 1:]) ** 2, axis=1))
        w = make_weights(E_dist, theta)
        ###Weighted predictors and responses
        X_wp = weight_data(sub_block[:, 1:], w)
        Y_wp = np.ravel(weight_data(sub_block[:, 0], w))
        X_target = block[ipred, 1:]
        Y_target = block[ipred, 0]

        ##Split training and test data
        pick_test = np.random.choice(range(X_wp.shape[0]), size=train_len, replace=False)
        X_train = np.append(np.delete(X_wp, pick_test, axis=0), X_target, axis=0)
        X_test = X_wp[pick_test, :]
        Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target)
        Y_test = Y_wp[pick_test]

        ###Fit function
        regr = ElasticNetCV(cv=cv, random_state=0, max_iter=iteration,
                            l1_ratio=[(i + 1) * l_grid for i in range(int(1 / l_grid))])
        regr.fit(X_train, Y_train)
        rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train) ** 2))
        rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test) ** 2))
        coefs[ipred, :] = regr.coef_
        fit_results[ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std(Y_train), rmse_o, np.std(
            Y_test), regr.score(X_test, Y_test), regr.score(X_train, Y_train), max(Y_train), min(Y_train), max(
            Y_test), min(Y_test)
        print('\r', 'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100), end="", flush=True)

        # Output results
    coefs = pd.DataFrame(data=coefs)
    coefs.to_csv('../Output/test/0/coefs/%s_%s_coefs.csv' % (target_otu, theta))
    fit_results = pd.DataFrame(
        columns=['Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o', 'Std_o', 'Test set score',
                 'Test set score_train', 'ymax_train', 'ymin_train', 'ymax_test', 'ymin_test'],
        data=fit_results)
    fit_results.to_csv('../Output/test/0/fit_result/%s_%s_fit_results.csv' % (target_otu, theta))
def test_enet_path():
    # We use a large number of samples and of informative features so that
    # the l1_ratio selected is more toward ridge than lasso
    X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100,
                                         n_informative_features=100)
    max_iter = 150

    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests
    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert clf.l1_ratio_ == min(clf.l1_ratio)

    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter, precompute=True)
    ignore_warnings(clf.fit)(X, y)

    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert clf.l1_ratio_ == min(clf.l1_ratio)

    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert clf.score(X_test, y_test) > 0.99

    # Multi-output/target case
    X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3)
    clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
                                cv=3, max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert clf.score(X_test, y_test) > 0.99
    assert clf.coef_.shape == (3, 10)

    # Mono-output should have same cross-validated alpha_ and l1_ratio_
    # in both cases.
    X, y, _, _ = build_dataset(n_features=10)
    clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf1.fit(X, y)
    clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf2.fit(X, y[:, np.newaxis])
    assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_)
    assert_almost_equal(clf1.alpha_, clf2.alpha_)
Esempio n. 3
0
    def train_elasticnet_model(self, mode, ffm):
        # X_train = np.array(self.X_train[mode])
        X_train = np.array(self.X_train2)
        y_train = np.array(self.y_train[ffm])

        # X_val = np.array(self.X_val[mode])
        X_val = np.array(self.X_val2)
        y_val = np.array(self.y_val[ffm])

        l1ratios = np.linspace(0.1, 1, 10)

        mses = []
        alps = []
        verr = []

        for l1 in l1ratios:
            print(l1)
            enet = ElasticNetCV(l1_ratio=l1, cv=10)
            enet.fit(X_train, y_train)
            y_pred = enet.predict(X_val)
            mse = mean_squared_error(y_val, y_pred)
            v = enet.score(X_val, y_val)

            mses.append(mse)
            alps.append(enet.alpha_)
            verr.append(v)

        i_opt = np.argmin(mses)
        l1_opt = l1ratios[i_opt]
        alpha_opt = alps[i_opt]

        print("optimal l1", l1_opt)
        print("optimal alpha", alpha_opt)

        enet2 = ElasticNetCV(l1_ratio=l1_opt)
        enet2.fit(X_train, y_train)
        y_pred = enet2.predict(X_val)
        y_pred_train = enet2.predict(X_train)

        print("Training MSE", mean_squared_error(y_train, y_pred_train))
        print("Validation MSE", mean_squared_error(y_val, y_pred))

        print("Training Pearson R", pearsonr(y_train, y_pred_train))
        print("Validation Pearson R", pearsonr(y_val, y_pred))

        print("Training R2 score:", enet.score(X_train, y_train))
        print("Validation R2 score:", enet.score(X_val, y_val))

        # print(enet2.alpha_)

        key = tuple(mode + [ffm])
        self.elasticnet[key] = enet2

        return self.elasticnet[key]
Esempio n. 4
0
def elastic_net(Xtrain, Ytrain, Xdev, Ydev, verbose=False):
    """
    Trains and Elastic Net Linear Model on the provided. Scores the model 
    and returns both the model and the score. It also prints the optimal
    hyperparameters.

    Inputs:
        Xtrain
        Ytrain
        Xdev
        Ydev

    Returns:
        float: the R^2 on the dev data for the best model specifications.
        ElasticNetCV: the trained model.
    """
    print("\n========================\nTraining Elastic Net\n")
    enet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],
                        max_iter=1e4,
                        tol=1e-2)
    enet.fit(Xtrain, Ytrain)
    best_score = enet.score(Xdev, Ydev)
    results = {
        "R2": best_score,
        "alpha": enet.alpha_,
        "l1_ratio": enet.l1_ratio_
    }
    if verbose:
        results['coefficients'] = enet.coef_.tolist()
    print(results, indent=4)
    return best_score, enet
Esempio n. 5
0
def score(inEval, X, y):
    indMatrix = pd.DataFrame()
    i = 0
    listEval = list(inEval)
    for ele in listEval:
        evalString = updatedEvalString(ele)
        #Exception handling against log(0)
        try:
            indMatrix[str.format('col{0}', i)] = eval(evalString)
        except ZeroDivisionError:
            continue
        i = i + 1
    # Remove inf with 1
    indMatrix = indMatrix.replace([np.inf, -np.inf], 1)

    # Linear regression with elastic net
    """
    regr = ElasticNet(random_state=0, l1_ratio = 0.1)
    regr.fit(indMatrix,y_train)
    
    
    y_p = regr.predict(indMatrix)
    regr.score(indMatrix,y_train)"""
    regr = ElasticNetCV(cv=2, random_state=0, max_iter=5000)
    regr.fit(indMatrix, y)
    return (regr.score(indMatrix, y))
Esempio n. 6
0
def elastic_net_cv(problem, **kwargs):
    r"""High level description.

    Parameters
    ----------
    kwargs['elastic_net_reg_coefs'] must be a list of nonnegative float.  These
    are the multiplier for the penalty term in cross-validation of EN

    kwargs['elastic_net_ratio'] must be between 0 and 1

    kwargs['coef_tolerance'] must be a nonnegative float

    Returns
    -------
    output : tuple
        (optimum, maximum)

    """
    data_list = [datum['data']['values'] for datum in problem.data]
    data = numpy.array(data_list)
    elastic_net = ElasticNetCV(alphas=kwargs['elastic_net_reg_coefs'],
                               l1_ratio=kwargs['elastic_net_ratio'])
    elastic_net.fit(data.T, problem.goal['data']['values'])
    elastic_net_coefficients = elastic_net.coef_
    optimum = [
        problem.data[index]
        for index, element in enumerate(elastic_net_coefficients)
        if abs(element) > kwargs['coef_tolerance']
    ]
    maximum = elastic_net.score(data.T, problem.goal['data']['values'])
    output = (optimum, maximum)
    return output
Esempio n. 7
0
def elastic_net(A, y, positive=True):
    A_scaler = StandardScaler().fit(A[:, 1:])
    y_scaler = StandardScaler().fit(y.reshape(-1, 1))
    A_new = A_scaler.transform(A[:, 1:])
    y_new = y_scaler.transform(y.reshape(-1, 1)).reshape(-1)
    clf = ElasticNetCV(l1_ratio=[0.1, 0.5, 1.0],
                       cv=5,
                       n_jobs=8,
                       normalize=False,
                       fit_intercept=False,
                       positive=positive).fit(A_new, y_new)
    score = clf.score(A_new, y_new)
    # Approximate assuming the elastic net is very close to the lasso
    df = np.count_nonzero(clf.coef_)
    logging.info(
        "[ElasticNet] # iter: %d, alpha: %e, l1_ratio: %.2f, # of terms: %d, score: %f",
        clf.n_iter_, clf.alpha_, clf.l1_ratio_, df, score)
    logging.debug("[ElasticNet] alphas:")
    logging.debug(str(clf.alphas_))
    logging.debug("[ElasticNet] MSE path:")
    logging.debug(str(clf.mse_path_))
    nonzero = abs(clf.coef_) > 0.0
    coef = np.zeros_like(clf.coef_)
    coef[nonzero] = (y_scaler.scale_ /
                     A_scaler.scale_[nonzero]) * clf.coef_[nonzero]
    intercept = y_scaler.mean_ - np.dot(A_scaler.mean_, coef)
    return np.append(intercept, coef), df
Esempio n. 8
0
def Elastic_net_fitting(block, target_otu, interest_otu, theta, train_len, cv, iteration, l_grid, output_dir):
    ##Select data and fitting
    print('Start fitting.')
    lib = range(block.shape[0])
    coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1))
    fit_results = np.empty(shape=(block.shape[0], 13))

    for ipred in lib:
        print('\r', 'Complete percentage: %.2f%%' % (ipred / len(lib) * 100), end="", flush=True)
        sub_block = np.delete(block, ipred, axis=0)
        q = block[lib[ipred], :]
        ###Calculate weights
        E_dist = np.sqrt(np.sum(np.array(sub_block[:, 1:] - q[:, 1:]) ** 2, axis=1))
        w = make_weights(E_dist, theta)
        ###Weighted predictors and responses
        X_wp = weight_data(sub_block[:, 1:], w)
        Y_wp = np.ravel(weight_data(sub_block[:, 0], w))
        X_target = block[ipred, 1:]
        Y_target = block[ipred, 0]

        ##Split training and test data
        pick_test = np.random.choice(range(X_wp.shape[0]), size=train_len, replace=False)
        X_train = np.append(np.delete(X_wp, pick_test, axis=0), X_target, axis=0)
        X_test = X_wp[pick_test, :]
        Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target)
        Y_test = Y_wp[pick_test]

        ###Fit function
        regr = ElasticNetCV(cv=cv, random_state=0, max_iter=iteration,
                            l1_ratio=[(i + 1) * l_grid for i in range(int(1 / l_grid))])
        regr.fit(X_train, Y_train)
        rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train) ** 2))
        rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test) ** 2))
        coefs[ipred, :] = regr.coef_
        fit_results[ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std(Y_train), rmse_o, np.std(
            Y_test), regr.score(X_test, Y_test), regr.score(X_train, Y_train), max(Y_train), min(Y_train), max(
            Y_test), min(Y_test)
        print('\r', 'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100), end="", flush=True)

    # Output results
    coefs = pd.DataFrame(data=coefs)
    coefs.to_csv('/'.join([output_dir,'coefs/%s_%s_%s_fit_results.csv' % (interest_otu, target_otu, theta)]))
    fit_results = pd.DataFrame(
        columns=['Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o', 'Std_o', 'Test set score',
                 'Test set score_train', 'ymax_train', 'ymin_train', 'ymax_test', 'ymin_test'],
        data=fit_results)
    fit_results.to_csv('/'.join([output_dir,'fit_result/%s_%s_%s_fit_results.csv' % (interest_otu, target_otu, theta)]))
Esempio n. 9
0
def enetCV():
    print ("Doing elastic net")
    cross_val = cross_validation.ShuffleSplit(len(base_X), n_iter=5, test_size=0.2, random_state=0)
    clf4 = ElasticNetCV(cv=cross_val)
    clf4.fit(base_X, base_Y)
    print ("Score = %f" % clf4.score(base_X, base_Y))
    clf4_pred = clf4.predict(X_test)
    write_to_file("elasticCV.csv", clf4_pred)
Esempio n. 10
0
def elasticnet_reg(x, y):
    elasticnetcv = ElasticNetCV(cv=20)
    elasticnetcv.fit(x, y)
    elasticnetcv_score = elasticnetcv.score(x, y)
    elasticnetcv_alpha = elasticnetcv.alpha_
    print('ElasticNet R square', elasticnetcv_score)
    print('ElasticNet Alpha', elasticnetcv_alpha)
    return elasticnetcv.coef_
Esempio n. 11
0
def score(inEval, X, y):
    indMatrix = pd.DataFrame()

    listEval = list(inEval)
    indMatrix = evaluatedMatrix(listEval, X)

    # Linear regression with elastic net
    #regr = ElasticNet(random_state=0, l1_ratio=0, alpha = 1)
    regr = ElasticNetCV(random_state=0)
    regr.fit(indMatrix, y)
    return (regr.score(indMatrix, y))
Esempio n. 12
0
def score(inEval, X, y):
    indMatrix = pd.DataFrame()
    
    listEval = list(inEval)
    indMatrix = evaluatedMatrix(listEval, X)
    
    try:
        # Linear regression with elastic net
        regr = ElasticNetCV(random_state=0)
        regr.fit(indMatrix,y)
        return (regr.score(indMatrix,y))
    except ValueError:
        print(indMatrix)#
Esempio n. 13
0
def _train_enet(y, Z, X, include_ses=False, p_threshold=0.01):
    log = logging.getLogger(pyfocus.LOG)
    try:
        from limix.qc import normalise_covariance
        from sklearn.linear_model import ElasticNetCV
    except ImportError as ie:
        log.error(
            "Training submodule requires limix>=2.0.0 and sklearn to be installed."
        )
        raise
    from scipy.linalg import lstsq

    log.debug("Initializing ElasticNet model")

    n = len(y)
    attrs = dict()

    K_cis = np.dot(Z, Z.T)
    K_cis = normalise_covariance(K_cis)
    fe_var, s2u, s2e, logl, fixed_betas, pval = _fit_cis_herit(y, K_cis, X)
    if pval > p_threshold:
        log.info("h2g pvalue {} greater than threshold {}. Skipping".format(
            pval, p_threshold))
        return None

    h2g = s2u / (s2u + s2e + fe_var)

    attrs["h2g"] = h2g
    attrs["h2g.logl"] = logl
    attrs["h2g.pvalue"] = pval

    # we only want to penalize SNP effects and not covariate effects...
    fixed_betas, sum_resid, ranks, svals = lstsq(X, y)
    yresid = y - np.dot(X, fixed_betas)

    enet = ElasticNetCV(l1_ratio=0.5, fit_intercept=True, cv=5)
    enet.fit(Z, yresid)
    betas = enet.coef_

    attrs["r2"] = enet.score(Z, yresid)
    attrs["resid.var"] = sum((yresid - enet.predict(Z))**2) / (n - 1)

    if include_ses:
        # TODO: bootstrap?
        ses = None
    else:
        ses = None

    return betas, ses, attrs
Esempio n. 14
0
def regress(x, y, title):
    clf = ElasticNetCV(max_iter=200, cv=10, l1_ratio = [.1, .5, .7, .9, .95, .99, 1])

    clf.fit(x, y)
    print "Score", clf.score(x, y)

    pred = clf.predict(x)
    plt.title("Scatter plot of prediction and " + title)
    plt.xlabel("Prediction")
    plt.ylabel("Target")
    plt.scatter(y, pred)

    # Show perfect fit line
    if "Boston" in title:
        plt.plot(y, y, label="Perfect Fit")
        plt.legend()

    plt.grid(True)
    plt.show()
Esempio n. 15
0
def fit_elasticnet(data, targets, permute=True):
    """
    Elasticnet regression
    """
    cv = ElasticNetCV()
    cv.fit(StandardScaler().fit_transform(data.values), targets)
    params = {"alpha":cv.alpha_, "l1_ratio":cv.l1_ratio_}
    score = cv.score(StandardScaler().fit_transform(data.values), targets)
    if permute == True:
        p = permutation_test_score(
            cv,
            data,
            targets,
            # cv=10,
            n_jobs=3,
            n_permutations=1000,
        )
        return params, score, p[-1]
    else:
        return params, score, -1
Esempio n. 16
0
def elastic_net(df):
    k_val = 40
    df = pd.get_dummies(df)
    df = remove_high_correlation(df)
    df = select_k_best(df, k_val)
    X = df.drop('DaysFromFirstDate', axis=1)
    y = df['DaysFromFirstDate']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.fit_transform(X_test)
    en = ElasticNetCV(cv=20, tol=.01)
    en.fit(X_train, y_train)
    pred = en.predict(X_test)
    print('Elastic Net Regression: ' + str(k_val))
    print(mean_squared_error(y_test, pred))
    print(en.score(X_test, y_test))
    scores = cross_val_score(en, X_train, y_train, cv=20)
    print(scores)
    print(scores.mean())
Esempio n. 17
0
    def train_elasticNetCV(self, data):
        train, validacion = data
        x_tr, y_tr = train
        x_val, y_val = validacion
        #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1]))
        #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1]))

        print('Start training ElasticNetCV...')
        start_time = self.timer()

        enet = ElasticNetCV(normalize=True,
                            n_alphas=2000,
                            max_iter=2000,
                            cv=10)
        enet.fit(x_tr, y_tr)
        print("The R2 is: {}".format(enet.score(x_tr, y_tr)))
        print("The alpha choose by CV is:{}".format(enet.alpha_))
        self.timer(start_time)

        print("Making prediction on validation data")
        y_val = np.expm1(y_val)
        y_val_pred = np.expm1(enet.predict(x_val))
        mae = mean_absolute_error(y_val, y_val_pred)
        print("El mean absolute error de es {}".format(mae))

        print('Saving model into a pickle')
        try:
            os.mkdir('pickles')
        except:
            pass

        with open('pickles/enetCV.pkl', 'wb') as f:
            pickle.dump(enet, f)

        print('Making prediction and saving into a csv')
        y_test = enet.predict(self.x_test)

        return y_test
Esempio n. 18
0
    def elasticNet(self, X, y, cv=5):

        emcv = ElasticNetCV(fit_intercept=True)

        err = 0.0

        scores = []
        for train_idx, test_idx in cv:

            emcv.fit(X[train_idx], y[train_idx])

            score = emcv.score(X[test_idx], y[test_idx])
            p = emcv.predict(X[test_idx])

            diff = p - y[test_idx]
            err += np.dot(diff, diff)

            scores.append(score)

        rmse = np.sqrt(err / len(y))
        #print "-- elastic net rmse : ", rmse, np.mean(scores)

        return emcv, scores, rmse
Esempio n. 19
0
def elastic_net_reg():
    from sklearn.linear_model import ElasticNetCV
    n_alphas = 300
    l1_ratio = [.1, .3, .5, .7, .9]
    rr = ElasticNetCV(n_alphas=n_alphas,
                      l1_ratio=l1_ratio,
                      cv=10,
                      random_state=0)
    rr.fit(X_train_scaled, y_train)
    y_pred_train = rr.predict(X_train_scaled)
    #y_pred_train_round = np.round(y_pred_train)
    y_pred_test = rr.predict(X_test_scaled)
    #y_pred_test_round = np.round(y_pred_test)
    print(rr.alpha_, rr.l1_ratio_)
    print(rr.score(X_test_scaled, y_test))
    #plot_conf_mat(y_test, _pred_round)
    global metrics_en
    metrics_en = [
        accuracy_score(y_test, np.round(y_pred_test)),
        mean_squared_error(y_test, y_pred_test),
        r2_score(y_test, y_pred_test)
    ]
    return scores_results(y_train, y_test, y_pred_train, y_pred_test)
Esempio n. 20
0
def ElasticNet_CV(X, Y, K_Fold, factor_string):
    factor = []
    Y = Y.fillna(0)
    X = X.fillna(0)
    X = np.array(X)
    Y = np.array(Y)
    Y = np.ravel(Y)
    ElasticNet_cv = ElasticNetCV(fit_intercept=True, random_state=0)
    k_fold = KFold(K_Fold)
    mean_alpha = []
    # Print alphas and scores
    print(
        "Alpha parameters maximising the generalization score on different subsets of the data:"
    )
    for k, (train, test) in enumerate(k_fold.split(X, Y)):
        ElasticNet_cv.fit(X[train], Y[train])
        print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".format(
            k + 1, ElasticNet_cv.alpha_, ElasticNet_cv.score(X[test],
                                                             Y[test])))
        mean_alpha.append(ElasticNet_cv.alpha_)
    # Averaged alpha
    mean_alpha = np.mean(mean_alpha)
    regr = ElasticNet(alpha=mean_alpha, fit_intercept=True, random_state=0)
    result = regr.fit(X, Y)
    print("Mean alpha: %f" % mean_alpha)
    print("\nIntercept: %f" % result.intercept_)
    for i in range(len(factor_string)):
        print("Coeffcients of %s : %f" % (factor_string[i], result.coef_[i]))
        if abs(result.coef_[i] - 0) >= 0.00001:
            factor.append(factor_string[i])
    N = X.shape[0]
    k = X.shape[1]
    score = result.score(X, Y)
    print("Score: ", score)
    print("Remaining factors: %s" % factor)
    return result.intercept_, result.coef_, mean_alpha
Esempio n. 21
0
predicted=lr.predict(X)
'''validation'''
kf=KFold(len(X),n_folds=5)
p=np.zeros_like(y)
for train,test in kf:
    lr.fit(X[train],y[train])
    p[test]=lr.predict(X[test])
rmse_cv=np.sqrt(mean_squared_error(p,y))
print "RMSE of 5-fold cv {:.2}".format(rmse_cv)
'''ElasticNet'''
from sklearn.linear_model import ElasticNetCV
met=ElasticNetCV(n_jobs=-1)
p=np.zeros_like(y)
for t,tst in kf:
    met.fit(X[t],y[t])
    p[tst]=met.predict(X[tst])
p2=r2_score(y,p)
print met.score(X,y)
print p2,"Elastic"





exit()
plt.scatter(predicted,y)
plt.xlabel("Predicted")
plt.ylabel("Actual ")
plt.plot([y.min(),y.max()],[[y.min()],[y.max()]])
plt.show()
Esempio n. 22
0
#Replace all 0 with a minimum value close to zero to resolve log(0) issue
x_scaled = replaceZeroes(x_scaled)
test = pd.DataFrame(x_scaled)

# Renaming the dataset columns
# test.columns = ['X1','X2','X3','X4','X5','y']
XColsSize = test.shape[1] - 1
XColsName = ['X{}'.format(x + 1) for x in range(0, XColsSize)]
FFXColsName = np.copy(XColsName)
XColsName.append('y')
XColsName

test.columns = XColsName

X = test.iloc[:, :-1]
y = test.iloc[:, -1]

# create training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

# Elastic net without GA
regr = ElasticNetCV(random_state=0, cv=5)
regr.fit(X_train, y_train)
regr.score(X_train, y_train)

regr.score(X_test, y_test)
Esempio n. 23
0
model.l1_ratio_
cdsw.track_metric("l1_ratio", model.l1_ratio_)

model.alpha_
cdsw.track_metric("alpha", model.alpha_)

# ## Model coefficients
model.intercept_
cdsw.track_metric("intercept", model.intercept_)

zip(feature_cols, model.coef_)
for i in range(0, len(feature_cols)):
    cdsw.track_metric(feature_cols[i], model.coef_[i])

# ## r squared scores
r_train = model.score(train_features, train_labels)
r_train
cdsw.track_metric("r_train", r_train)

r_test = model.score(test_features, test_labels)
r_test
cdsw.track_metric("r_test", r_test)

# ## Persist model during experiment
filename = 'bikeshare_model.pkl'
joblib.dump(model, filename)
cdsw.track_file(filename)

#timestamp = datetime.now().strftime('%Y%m%d%H%M%S%f')
#joblib.dump(model, 'bikeshare_model_' + timestamp + '.pkl')
Esempio n. 24
0
y = dataset.iloc[:, -1]

# create training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

# Elastic net without GA
regr = ElasticNetCV(random_state=0, cv=5)
regr.fit(X_train, y_train)
print("Elastic Net:")
# R2 score on train data
print("Train:", regr.score(X_train, y_train))

#print(regr.score(X_test, y_test))


# Sort the coeficients
def sortCoef(columns, coef):
    nlist = [(y, x) for x, y in zip(columns, coef)]
    try:
        nlist = sorted(nlist, key=itemgetter(0), reverse=True)
    except ValueError:
        print("Error nlist:", nlist)
    return [val for (key, val) in nlist], [key for (key, val) in nlist]


# print a number to 3 significant digits
Esempio n. 25
0
plt.figure()
plt.hist2d(y, lasso_prediction)
plt.ylabel("Predicted Values")
plt.xlabel("Truth Values")
plt.title("Lasso Linear Regression")
plt.savefig("figures/lasso_predicted_truth.png")
print "#######ELASTIC#####"
coef_path_elastic_cv.fit(X,y)
print coef_path_elastic_cv.get_params
print "alphas:" 
print  coef_path_elastic_cv.alphas_
print "coef_:"
print coef_path_elastic_cv.coef_
print "length of elastic terms:%d" % len(coef_path_elastic_cv.coef_)
elastic_predict = coef_path_elastic_cv.predict(X)
elastic_score = coef_path_elastic_cv.score(X,y)
print "elastic_score:%.3g" % elastic_score
elastic_cv_score = cross_validation.cross_val_score(coef_path_elastic_cv, X, y, n_jobs=2, cv=5)
print elastic_cv_score
#print "elastic precision:%.3g" %  precision_score(y, elastic_predict, average='macro') 
plt.figure()
plt.hist2d(y, elastic_predict)
plt.ylabel("Predicted Values")
plt.xlabel("Truth Values")
plt.title("Elastic Linear Regression")
plt.savefig("figures/elastic_predicted_truth.png")
print "#######Logistic#####"
coef_path_logistic_cv.fit(X,binary_y)
print coef_path_logistic_cv.get_params
print "coef_:"
print coef_path_logistic_cv.coef_
Esempio n. 26
0
def LASSO_inverse_solve(data, waves):
    """Function to compute the inverse solution with LASSO with positive coefficients
    Parameters
    ----------
    data : numpy.ndarray
        Input data for the inverse problem (N_channels x T)
    waves : numpy.ndarray
       Basis waves to fit (directions_number x number_of_speeds x channels_number x timepoints_number)
    Returns
    -------
    best_score : R-squared in optimum
    best_coefs : coefficients in optimum
    best_shift : starting time point in optimum
    best_speed_ind : index of the best speed
    """

    import numpy as np
    from sklearn.linear_model import ElasticNetCV

    Ndir = waves.shape[0] # number of propagation directions
    R = data.shape[1]-waves.shape[3] + 1 # number of sliding window shifts
    S = waves.shape[1] # number of propagation speeds
    Tw = waves.shape[3]

    regression = ElasticNetCV(l1_ratio=1, positive=True, cv=5, max_iter=100000) # elastic net regression

    coefs = np.zeros([R, S, Ndir]) # regression coefficients
    # intercept = np.zeros([R,S]) # regression intercept
    score = np.zeros([R, S]) # R-squared scores
    nzdir = np.zeros([R, S]) # number of nonzero directions
    y_pred = np.zeros([R, S, data.shape[0]*data.shape[1]]) # predicted spikes

    for r in range(0, R):
        data_vec = data[:,r:(Tw+r)].flatten()
        for s in range(0, S):
            wavesspeed = waves[:, s, :, :]
            wavesspeed_vec = np.zeros([Ndir, data_vec.shape[0]])
            for d in range(0, Ndir):
                wavesspeed_vec[d] = wavesspeed[d,:,:].flatten()
            regression.fit(wavesspeed_vec.T, data_vec)
            coefs[r, s, :] = regression.coef_
            # intercept[r, s] = regression.intercept_
            score[r, s] = regression.score(wavesspeed_vec.T, data_vec)
            y_pred[r, s, :] = regression.predict(wavesspeed_vec.T)
            nzdir[r, s] = np.sum(coefs[r, s, :] != 0)

    shifts_s = score.argmax(axis = 0) # best shifts for each speed
    score_s = score.max(axis = 0) # corresponding scores

    # best solution according to the number of nonzero directions
    # nzdir_s = np.zeros(S) # corresponding number of nonzero directions (without intercept)
    # for s in range(0, S):
    #     nzdir_s[s] = nzdir[shifts_s[s], s]
    #
    # score_sort_ind = (-score_s).argsort() # indices of sorted scores for all speeds
    # dir_sort_ind = (nzdir_s[score_sort_ind[0:3]]).argsort() # indices of sorted number of nonzero directions for top-3 scores
    #
    # best_speed_ind = score_sort_ind[dir_sort_ind[0]] # index of best speed

    score_sort_ind = (-score_s).argsort()
    best_speed_ind = score_sort_ind[0]
    best_coefs = coefs[shifts_s[best_speed_ind], best_speed_ind]
    best_score = score_s[best_speed_ind] # R-squared value in optimum
    best_shift = shifts_s[best_speed_ind]

    # plt.figure()
    # plt.plot(data_vec)
    # plt.plot(y_pred[bestshifts[bestind], bestind, :])
    # plt.title(['R-squared = ', str(finalscore)])

    return [best_score, best_coefs, best_shift, best_speed_ind]
def Regularized_Smap(abund, target_otu, theta, l_grid, iteration, cv, test_len,
                     uncontinuous):
    print('Process data for otu No. %s' % str(target_otu + 1))
    # Make input for the elastic_net
    block = np.append(abund[1:, target_otu], abund[0:-1, ], axis=1)
    ##Delete the uncontinuous states
    ##Commonly, we inferer the Jacobian matrices using the continuous time series. However, if you don't have enough time points but have the replicate time series from independet reactors.
    ##You can combine the replicate OTU tables as the input but delete uncontinuous states in the block.
    if uncontinuous == True:
        block = np.delete(block,
                          [abund.shape[0] / 3 - 1, abund.shape[0] / 3 * 2 - 1],
                          axis=0)
        ##Triplicate time series are used as example, so we remove two uncontiunous states, i.e., [abund.shape[0] / 3 - 1, abund.shape[0] / 3 * 2 - 1], in the block.
        ##You can also specify the list of uncontiunous states in the block using the following line. Remember to uncomment the the following line and delete line 55.
        # block = np.delete(block, [uncontiunous states], axis=0)
        ##The [uncontiunous states] can be like [34, 55] or [1, 2, 10] as you need it to be.
    ##Scaling the input
    ##Each time series is normalized to have a mean of 0 and standard deviation of 1 before analysis with S-maps
    block = (block - np.average(block, axis=0)) / np.std(block, axis=0)

    ##Select data and fitting
    print('Start fitting.')
    lib = range(block.shape[0])
    coefs = np.empty(shape=(block.shape[0], block.shape[1] - 1))
    fit_results = np.empty(shape=(block.shape[0], 13))

    for ipred in lib:
        print('\r',
              'Complete percentage: %.2f%%' % (ipred / len(lib) * 100),
              end="",
              flush=True)
        sub_block = np.delete(block, ipred, axis=0)
        q = block[lib[ipred], :]
        ###Calculate weights
        E_dist = np.sqrt(
            np.sum(np.array(sub_block[:, 1:] - q[:, 1:])**2, axis=1))
        w = make_weights(E_dist, theta)
        ###Weighted predictors and responses
        X_wp = weight_data(sub_block[:, 1:], w)
        Y_wp = np.ravel(weight_data(sub_block[:, 0], w))
        X_target = block[ipred, 1:]
        Y_target = block[ipred, 0]

        ##Split training and test data
        pick_test = np.random.choice(range(X_wp.shape[0]),
                                     size=test_len,
                                     replace=False)
        X_train = np.append(np.delete(X_wp, pick_test, axis=0),
                            X_target,
                            axis=0)
        X_test = X_wp[pick_test, :]
        Y_train = np.append(np.delete(Y_wp, pick_test, axis=0), Y_target)
        Y_test = Y_wp[pick_test]

        ###Fit function
        regr = ElasticNetCV(cv=cv,
                            random_state=0,
                            max_iter=iteration,
                            l1_ratio=[(i + 1) * l_grid
                                      for i in range(int(1 / l_grid))])
        regr.fit(X_train, Y_train)
        rmse = np.sqrt(np.mean((regr.predict(X_train) - Y_train)**2))
        rmse_o = np.sqrt(np.mean((regr.predict(X_test) - Y_test)**2))
        coefs[ipred, :] = regr.coef_
        fit_results[
            ipred, :] = regr.intercept_, regr.alpha_, regr.l1_ratio_, rmse, np.std(
                Y_train), rmse_o, np.std(Y_test), regr.score(
                    X_test, Y_test), regr.score(X_train, Y_train), max(
                        Y_train), min(Y_train), max(Y_test), min(Y_test)
        print('\r',
              'Complete percentage: %.2f%%' % ((ipred + 1) / len(lib) * 100),
              end="",
              flush=True)

        # Output results
    coefs = pd.DataFrame(data=coefs)
    coefs.to_csv('/'.join(
        [output_dir, 'coefs/%s_%s_coefs.csv' % (target_otu, theta)]))
    fit_results = pd.DataFrame(columns=[
        'Intercept', 'Best alpha', 'Best l1_ratio', 'RMSE', 'Std', 'RMSE_o',
        'Std_o', 'Test set score', 'Test set score_train', 'ymax_train',
        'ymin_train', 'ymax_test', 'ymin_test'
    ],
                               data=fit_results)
    fit_results.to_csv('/'.join(
        [output_dir,
         'fit_result/%s_%s_fit_results.csv' % (target_otu, theta)]))
Esempio n. 28
0
    def elastnet_reg(X_train,
                     X_test,
                     y_train,
                     y_test,
                     poly_name=False,
                     features=None,
                     cv=10):
        '''
        Function for building an elastic net regression model. 
        Stores:
        - Model name
        - Number of coefficients
        - Best alpha parameter
        - Best l1 parameter
        - R-squared train score
        - Cross-validation r-squared score
        - R-squared test score
        - Root mean squared error (RMSE)
        '''
        # model name
        if poly_name == False:
            name = 'Elastic Net Regression Model ({})'.format(
                str(features) + ' features')
        else:
            name = 'Polynomial Elastic Net Regression Model ({})'.format(
                str(features) + ' features')

        # Cross-validate model
        elast_net = ElasticNetCV(n_alphas=100,
                                 l1_ratio=np.linspace(0.01, 1, 20),
                                 max_iter=1000000,
                                 random_state=1,
                                 cv=3)
        cv_scores = cross_val_score(elast_net, X_train, y_train, cv=cv)
        cv_r2_mean = np.mean(cv_scores)
        cv_r2_std = np.std(cv_scores)

        # fit model
        elast_net.fit(X_train, y_train)

        # number of coefficients
        coefs = len(elast_net.coef_)

        # best alpha and l1 ratio
        best_alpha = elast_net.alpha_
        best_l1 = elast_net.l1_ratio_

        # make predictions
        y_pred = elast_net.predict(X_test)

        # evaluate performance on train and test data
        r2_train = elast_net.score(X_train, y_train)
        r2_test = elast_net.score(X_test, y_test)
        rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))

        # save results in dictionary
        results_dict = {
            'Model': name,
            'Alpha': best_alpha,
            'L1': best_l1,
            'No. of coefs:': coefs,
            'R^2 train': r2_train,
            'Cross-Validation R^2 scores': cv_scores,
            'Cross-Validation R^2 mean': cv_r2_mean,
            'Cross-Validation R^2 std': cv_r2_std,
            'R^2 test': r2_test,
            'RMSE': rmse
        }
        return elast_net, results_dict
stack_val_mse = mean_squared_error(ytestv, stack_val_pred)
stack_test_mse = mean_squared_error(ytest, stack_test_pred)

print("RMSE using StackRegressor:\t{}\t{}\t{}\n".format(
    np.sqrt(stack_train_mse), np.sqrt(stack_val_mse), np.sqrt(stack_test_mse)))

df_rf = pd.DataFrame({'Actual': ytest, 'Predicted': stack_test_pred})
fig1 = pp.figure(figsize=(8, 6))
df_rf.head(n=300).plot()
pp.legend()
pp.title("StackRegressor Actual v/s Predicted Annual Rainfall")
pp.xticks(())
pp.yticks(())
pp.show()

print(rfreg.score(Xtest, ytest), elastic.score(Xtest, ytest),
      stack.score(Xtest, ytest))

# CLASSIFICATION & CLUSTERING METHODS

# Data splitting
X = np.array(raindf[['JAN-FEB', 'MAR-MAY', 'JUN-SEP', 'OCT-DEC']])
y = np.array(raindf['REGION'])

le = preprocessing.LabelEncoder()
le.fit(y)
y = le.fit_transform(y)

Xreduced = PCA(n_components=2).fit_transform(X)
results = pd.DataFrame(Xreduced, columns=['pca1', 'pca2'])
Esempio n. 30
0

#%%
#try elastic net

#alpha equals lambda here
lambda_grid = [0.01, 0.1 , 1, 10,100]
l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9]

enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,cv=3,n_jobs=-1,verbose=True)

enet_CV.fit(train_X,train_Y)

#%%
#show
enet_CV.score(test_X,test_Y)
plt.plot(enet_CV.predict(test_X),test_Y,'o')
#%%
#try svr

svr = SVR(kernel = 'rbf',C=1,cache_size=2000)

SVR_params = { 'C' : [1e-1,1.0,1e2,1e3,1e4] }
svr_rs = grid_search.RandomizedSearchCV(svr,SVR_params,verbose=True,n_jobs=-1)

svr.fit(train_X[:,whichones[0]],train_Y)

#%%
#try bagging/boosting etc
#rfr = RandomForestRegressor(n_estimators = 30,n_jobs = 2)
Esempio n. 31
0
plt.hist(y, 20)

Xs = np.nan_to_num(Xso, 0)
fx = StandardScaler()
X2 = Xs  #fx.fit_transform(Xs)
y = np.array(y).astype(float)

reg = ElasticNetCV(
    [.1, .7, .725, .75, .775, .8, .9, .95, .99, 1],
    cv=10,
    positive=True,
    max_iter=1e4)  #(alpha=0.1,l1_ratio=0.7)#CV(cv=10)#ElasticNetCV(.7,cv=10,)
#reg = lgb.LGBMRegressor()
reg.fit(X2, y)
print(Xs.shape, reg.score(X2, y))

plt.style.use('seaborn-white')
plt.scatter(reg.predict(X2), y, s=10, alpha=0.5)
plt.ylim(-60, 60)
plt.xlim(-60, 60)
plt.xlabel('predicted margin')
plt.ylabel('actual margin')

#sorted([(i,n) for i,n in zip(reg.feature_importances_,exp_lbl) if 'LB' in n],reverse=True)

reg.l1_ratio_

exp_lbl = sum([[str(p) + '_' + str(s) for s in valid_col] for p in valid_pos],
              [])
Esempio n. 32
0

#%%
#try elastic net

#alpha equals lambda here
lambda_grid = [0.01, 0.1 , 1, 10,100]
l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9]

enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,alphas=lambda_grid,cv=3,n_jobs=-1,verbose=True)

enet_CV.fit(train_X,train_Y)

#%%
#show
enet_CV.score(test_X,test_Y)
plt.plot(enet_CV.predict(test_X),test_Y,'o')
#%%
#try svr

svr = SVR(kernel = 'rbf',C=1,cache_size=2000)

SVR_params = { 'C' : [1e-1,1.0,1e2,1e3,1e4] }
svr_rs = grid_search.RandomizedSearchCV(svr,SVR_params,verbose=True,n_jobs=-1)

svr.fit(train_X[:,whichones[0]],train_Y)

#%%
#try bagging/boosting etc
#rfr = RandomForestRegressor(n_estimators = 30,n_jobs = 2)
Esempio n. 33
0
md=dnn_reg(X_train,y_train,X_test,y_test)
reg_eval(X_test,y_test,md)

###Lasso CV regression

def reg_eval2(y_test,model):
    y_pred=model.predict(X_test)
    print("evaluation the results for model:",model)
    print("MSE:",mean_squared_error(y_test,y_pred))
    print("R2:",r2_score(y_test,y_pred))
    print("EVS:",explained_variance_score(y_test,y_pred))

lasso = LassoCV(cv=5, random_state=0,max_iter=10000)
lasso.fit(X_train,y_train)
reg_eval2(y_test,lasso)

#ElasticNet Regressionb
ela = ElasticNetCV(l1_ratio=0.8,normalize=True,max_iter=5000,random_state=77)
ela.fit(X_train,y_train)
print("R square:",ela.score(X_test,y_test))
reg_eval2(y_test,ela)


#SVR Regression
from sklearn.svm import LinearSVR
LSVR=LinearSVR(epsilon=0.1,random_state=0, tol=1e-5,max_iter=10000)
# scaler=RobustScaler()
# pipe=Pipeline(steps=[("scaling",scaler),("rg",LSVR)])
LSVR.fit(X_train,y_train)
reg_eval2(y_test,LSVR))
)  #train the algorithm on training data and predict using the testing data
y_predelast = elast.predict(X_test)
print('Betas: ', list(zip(elast.coef_, X)))
print('Beta0: %.2f' % elast.intercept_)  #Beta0

# 5.1.4.1 Elastic Net Regression with crossvalidation to calculate optimal alpha

elastcv = ElasticNetCV(cv=5, random_state=42)
pred_elastcv = elastcv.fit(X_train, y_train).predict(
    X_test
)  #train the algorithm on training data and predict using the testing data
y_predelastcv = elastcv.predict(X_test)
print('Optimal Aplha Value: ', elastcv.alpha_)
print('Betas: ', list(zip(elastcv.coef_, X)))
print('Beta0: %.2f' % elastcv.intercept_)  #Beta0
print('R²: %.2f' % elastcv.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predelastcv))

# 5.1.5 Robust regression
'''Robust regression aims to fit a regression model in the presence 
of corrupt data: either outliers, or error in the model


HuberRegressor should be faster than RANSAC and Theil Sen unless the number of 
samples are very large, i.e n_samples >> n_features. This is because RANSAC and 
Theil Sen fit on smaller subsets of the data. However, both Theil Sen and RANSAC 
are unlikely to be as robust as HuberRegressor for the default parameters.

RANSAC is faster than Theil Sen and scales much better with the number of samples.

RANSAC will deal better with large outliers in the y direction (most common situation).
Esempio n. 35
0
#%%
interesting_ones = ['G13','G14','G15','G19','G21']



r2_mfcc = []
r2_stft = []
for chan in interesting_ones:
    y = Y[:,electrode_names.index(chan)]
    train_X,test_X,train_Y,test_Y = train_test_split(np.hstack([mfcc_X,X]),y,test_size=0.3)
    mfcctrain_X = train_X[:,:325]
    train_X = train_X[:,325:]
    l1_ratio_grid = [0.1,0.3,0.5,0.7,0.9]
    enet_CV = ElasticNetCV(l1_ratio=l1_ratio_grid,n_jobs=-1,verbose=True)
    enet_CV.fit(train_X,train_Y)
    r2_stft.append(enet_CV.score(test_X[:,325:],test_Y))
    enet_CV.fit(mfcctrain_X,train_Y)
    r2_mfcc.append(enet_CV.score(test_X[:,:325],test_Y))


#%%

#%for standardizing in lagged stimuli space
scaler = preprocessing.StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

#add intercept
#X = np.hstack((np.ones(X.shape[0])[:,None],X))

yscaler = preprocessing.StandardScaler()