コード例 #1
lars_test = np.arange(0.0001, 0.002, 0.0001)
lars_alpha, lars_err = general_model(LassoLars, lars_test)

lars_alpha, lars_err

max_iter = 50000

lasso_model = Lasso(alpha=alpha_list[0], max_iter=max_iter).fit(trainX, trainY)
elasticNet_model = ElasticNet(alpha=alpha_list[1],
                              max_iter=max_iter).fit(trainX, trainY)
ridge_model = Ridge(alpha=alpha_list[2], max_iter=max_iter).fit(trainX, trainY)
lars_model = LassoLars(alpha=alpha_list[3],
                       max_iter=max_iter).fit(trainX, trainY)

lasso_pred = np.expm1(lasso_model.predict(raw_test_df))
ridge_pred = np.expm1(ridge_model.predict(raw_test_df))
elasticNet_pred = np.expm1(elasticNet_model.predict(raw_test_df))
lars_pred = np.expm1(lars_model.predict(raw_test_df))
pred_list = np.array(
    [lasso_pred, ridge_pred, elasticNet_pred, lars_pred, xgb_pred])

# take average of 4 models
err_list = np.array(err_list)

w_list = 1 / err_list
total_w = np.sum(w_list)
predictions = np.matmul(w_list / total_w, pred_list)
コード例 #2
    for expl in format_as_all(res, clf):
        assert 'Error' in expl
        assert 'BaseEstimator' in expl
    with pytest.raises(TypeError):
        explain_weights(clf, unknown_argument=True)

@pytest.mark.parametrize(['reg'], [
    [PassiveAggressiveRegressor(C=0.1, random_state=42)],
def test_explain_linear_regression(boston_train, reg):
    assert_explained_weights_linear_regressor(boston_train, reg)
コード例 #3
# 从使用默认配置初始化Lasso。
lasso = Lasso(alpha=0.07)
scores7 = cross_val_score(lasso, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores7

# 从sklearn.linear_model导入Ridge。
from sklearn.linear_model import Ridge
# 使用默认配置初始化Riedge。
ridge = Ridge(alpha=1)
scores8 = cross_val_score(ridge, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores8

from sklearn.linear_model import LassoLars
scores9 = cross_val_score(lars, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores9

from sklearn.linear_model import ElasticNetCV
scores10 = cross_val_score(elasticnet, sX, sy, cv=10,scoring='neg_mean_squared_error')
print scores10

from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
# 从sklearn导入特征筛选器。
from sklearn import feature_selection

fs = feature_selection.SelectPercentile(feature_selection.f_regression, percentile = 100)#30
コード例 #4
    ll = sum(act * sp.log(pred) +
             sp.subtract(1, act) * sp.log(sp.subtract(1, pred)))
    ll = ll * -1.0 / len(act)
    return ll

# add two columns for hour and weekday
def dayhour(timestr):
    d = datetime.strptime(str(x), "%y%m%d%H")
    return [float(d.weekday()), float(d.hour)]

fh = FeatureHasher(n_features=2**20, input_type="string")

# Train classifier
clf = LassoLars()
train = pd.read_csv("train/subtrain.csv", chunksize=100000, iterator=True)
all_classes = np.array([0, 1])
for chunk in train:
    y_train = chunk["click"]
    chunk = chunk[cols]
    chunk = chunk.join(
        pd.DataFrame([dayhour(x) for x in chunk.hour], columns=["wd", "hr"]))
    chunk.drop(["hour"], axis=1, inplace=True)
    Xcat = fh.transform(np.asarray(chunk.astype(str)))
    clf.fit(Xcat, y_train)

# Create a submission file
usecols = cols + ["id"]
X_test = pd.read_csv("test/mtest.csv", usecols=usecols)
X_test = X_test.join(
コード例 #5
ファイル: utils_models.py プロジェクト: yew1eb/auto_ml
def get_model_from_name(model_name, training_params=None):

    # For Keras
    epochs = 250
    if 'is_test_suite' in sys.argv:
            'Heard that this is the test suite. Limiting epochs to 10, which will increase training speed dramatically at the expense of model accuracy'
        epochs = 10

    all_model_params = {
        'LogisticRegression': {
            'n_jobs': -2
        'RandomForestClassifier': {
            'n_jobs': -2
        'ExtraTreesClassifier': {
            'n_jobs': -1
        'AdaBoostClassifier': {
            'n_estimators': 10
        'SGDClassifier': {
            'n_jobs': -1
        'Perceptron': {
            'n_jobs': -1
        'LinearRegression': {
            'n_jobs': -2
        'RandomForestRegressor': {
            'n_jobs': -2
        'ExtraTreesRegressor': {
            'n_jobs': -1
        'MiniBatchKMeans': {
            'n_clusters': 8
        'GradientBoostingRegressor': {
            'presort': False
        'SGDRegressor': {
            'shuffle': False
        'PassiveAggressiveRegressor': {
            'shuffle': False
        'AdaBoostRegressor': {
            'n_estimators': 10
        'XGBRegressor': {
            'nthread': -1,
            'n_estimators': 200
        'XGBClassifier': {
            'nthread': -1,
            'n_estimators': 200
        'LGBMRegressor': {},
        'LGBMClassifier': {},
        'DeepLearningRegressor': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        'DeepLearningClassifier': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
            'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:'

    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'SGDClassifier': SGDClassifier(),
        'Perceptron': Perceptron(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),

        # Regressors
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),
        'SGDRegressor': SGDRegressor(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans()

    if xgb_installed:
        model_map['XGBClassifier'] = xgb.XGBClassifier()
        model_map['XGBRegressor'] = xgb.XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = lgb.LGBMRegressor()
        model_map['LGBMClassifier'] = lgb.LGBMClassifier()

    if keras_installed:

        model_map['DeepLearningClassifier'] = KerasClassifier(
        model_map['DeepLearningRegressor'] = KerasRegressor(

    model_without_params = model_map[model_name]
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
コード例 #6
def model_compare(data, target):
    # mean
    evaluate(MeanEstimator(), data.values, target.values, "Mean Estimator")

    # linear
    param_grid = {"normalize": [True, False], "fit_intercept": [True, False]}
    evaluate(LinearRegression(), data, target, "Linear", param_grid=param_grid)

    # poly
    poly = Pipeline([('poly',
                      PolynomialFeatures(degree=2, interaction_only=True)),
                     ('linear', Lasso(alpha=3))])
    evaluate(poly, data, target, "Poly")

    # decision tree
    param_grid = {"max_features": ["auto", "sqrt", "log2", None]}
             "Decision Tree",

    # elastic
    param_grid = dict(alpha=10.0**np.arange(-5, 4),
                      l1_ratio=0.1 * np.arange(0, 11),
                      normalize=[True, False],
                      fit_intercept=[True, False])
    evaluate(ElasticNet(), data, target, "Elastic", param_grid=param_grid)

    # ridge
    param_grid = dict(
        alpha=10.0**np.arange(-5, 4),
        normalize=[True, False],
        fit_intercept=[True, False],
        solver=["auto", "svd", "cholesky", "lsqr", 'sparse_cg', 'sag'])
    evaluate(Ridge(), data, target, "Ridge", param_grid=param_grid)

    # SVR
    param_grid = dict(C=10.0 * np.arange(50, 70, 10),
                      kernel=['linear', 'poly', 'rbf', 'sigmoid'])
    evaluate(svm.SVR(), data, target, "SVR", param_grid=param_grid, n_jobs=4)

    # XGBoost
    param_grid = {'max_depth': [2, 4, 6], 'n_estimators': [50, 100, 200]}

    # SGD Regressor()
    param_grid = {
        'loss': [
            'squared_loss', 'huber', 'epsilon_insensitive',
        'penalty': ['none', 'l2', 'l1', 'elasticnet']

    # GradientBoostingRegressor
    gbr = GradientBoostingRegressor(loss='quantile', criterion="mae")
    evaluate(gbr, data, target, "GradientBoostingRegressor")

    # # AdaBoostClassifier
    # ada = AdaBoostClassifier(n_estimators=100)
    # evaluate(ada, data, target, "AdaBoostClassifier")

    # BaggingRegressor
    evaluate(BaggingRegressor(), data, target, "BaggingRegressor")

    # KNeighborsRegressor
    kn = KNeighborsRegressor(n_neighbors=4, weights="distance")
    evaluate(kn, data, target, "KNeighborsRegressor")

    # BayesianRidge
    br = BayesianRidge()
    evaluate(br, data, target, "BayesianRidge")

    lasso_lars_ic(data, target)

    alpha = lassoCV(data, target)
    print "alpha " + str(alpha)
    lasso = Lasso(alpha=alpha, normalize=False)
    evaluate(lasso, data, target, "Lasso")

    alpha = lasso_lars_cv(data, target)
    print "alpha " + str(alpha)
    lasso_lars = LassoLars(alpha=alpha)
    evaluate(lasso_lars, data, target, "Lasso Lars")
def lassoLars(X, y, value):
    regressor = LassoLars(alpha=0.3, max_iter=600000)
    regressor.fit(X, y)
    y_pred = regressor.predict(value)
    return y_pred
def lassoCD(X, y, ll, ul, step, state):
    kf = KFold(n_splits=10, shuffle=True, random_state=state)
    feature = []
    pred = []
    true = []
    r2 = []
    mse = []
    ilist = np.linspace(ll, ul, step)
    pbar = tnrange(step * 10, desc='loop')
    for i in ilist:
        r2_single = []
        mse_single = []
        pred_single = []
        true_single = []
        feature_single = []
        for train_index, test_index in kf.split(X):
            y_train, y_test = y[train_index], y[test_index]
            X_train_tmp, X_test_tmp = X[train_index], X[test_index]

            clf = LassoLars(alpha=i)
            clf.fit(X_train_tmp, np.ravel(y_train))
            feature_index = np.where(clf.coef_ > 0)[0]
            X_train = X_train_tmp[:, feature_index]
            X_test = X_test_tmp[:, feature_index]

            svr = svm.SVR(kernel='linear')
            svr.fit(X_train, np.ravel(y_train))
            y_test_pred = svr.predict(X_test)

            r2_single.append(r2_score(y_test, y_test_pred))
            mse_single.append(mean_squared_error(y_test, y_test_pred))
        r2_single = np.array(r2_single)
        f = np.where(r2_single == max(r2_single))[0][0]

#         print(np.array(feature_single)[f])
    r2 = np.array(r2)
    r2_mean = np.average(r2, axis=1, weights=weight)
    a = np.where(r2_mean == max(r2_mean))[0][0]
    pred = np.array(pred)[a]
    true = np.array(true)[a]
    mse = np.array(mse)[a]
    feature = np.array(feature)[a]
    alpha = ilist[a]
    r2 = r2[a]

    tmp = np.zeros([0, 2])
    tmp_ = np.zeros([0, 10])
    for i in range(10):
        p = np.expand_dims(pred[i], axis=1)
        t = np.expand_dims(true[i], axis=1)
        tmp1 = np.concatenate([p, t], axis=1)
        tmp = np.concatenate([tmp, tmp1], axis=0)
    df1 = pd.DataFrame(tmp, columns=['Predict', 'True'])
    df2 = pd.DataFrame({'r2': r2, 'mse': mse})
    df3 = pd.DataFrame({'features': feature})

    plt.plot(ilist, r2_mean)
    print('max r2_score=', r2_mean[a], ', corresponding alpha=', alpha, a)
    print('number of selected features:', feature.shape[0])
    return df1, df2, df3
    for i in range(result_row):
        sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
    rank_result['Lars_pca'] = sumsum / float(result_row)
    rs_score['Lars_pca'] = r2_score(y_test, y)
    LarsModel = Lars()
    LarsModel.fit(X_train_std, y_train)
    y = LarsModel.predict(X_test_std)
    [result_row] = y.shape
    sumsum = 0
    #print y
    for i in range(result_row):
        sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
    rank_result['Lars_std'] = sumsum / float(result_row)
    rs_score['Lars_std'] = r2_score(y_test, y)

    LassoLarsModel = LassoLars()
    LassoLarsModel.fit(X_train_pca, y_train)
    y = LassoLarsModel.predict(X_test_pca)
    [result_row] = y.shape
    sumsum = 0
    #print y
    for i in range(result_row):
        sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
    rank_result['LassoLars_pca'] = sumsum / float(result_row)
    rs_score['LassoLars_pca'] = r2_score(y_test, y)
    LassoLarsModel = LassoLars()
    LassoLarsModel.fit(X_train_std, y_train)
    y = LassoLarsModel.predict(X_test_std)
    [result_row] = y.shape
    sumsum = 0
    #print y
コード例 #10
for l1_ratio in l1_ratios:
        ("ELN_L1_" + str(l1_ratio), ElasticNet(l1_ratio=l1_ratio,

getCVResult(models_eln_l1, X_learning, Y_learning)

#LassoLars tuning
alphas = [
    0.000005, 0.00001, 0.00003, 0.000035, 0.000036, 0.000037, 0.000038,
    0.00004, 0.00005, 0.00007, 0.0001
models_lala = []

for alpha in alphas:
    models_lala.append(("LaLa_" + str(alpha), LassoLars(alpha=alpha)))

getCVResult(models_lala, X_learning2, Y_learning)

#XGB model tuning
n_estimators = [400, 450, 470, 540, 550, 560]
models_xgb = []

for n_estimator in n_estimators:
    models_xgb.append(("XGB_" + str(n_estimator),

getCVResult(models_xgb, X_learning, Y_learning)
コード例 #11
def all_models_info():
    '''takes in data
    sets baseline
    sets SSE, MSE, and RMSE
    returns infor for all 4'''
    # get data
    df = acquire.acquire_zillow()
    df = prepare.clean_zillow(df)
    df = prepare.focused_zillow(df)
    # pull from add to trian
    train = evaluate.add_to_train()
    X_train, y_train, X_validate, y_validate, X_test, y_test = evaluate.xtrain_xval_xtest(
    #OLS Model
    lm = LinearRegression(normalize=True)
    lm.fit(X_train, y_train.appraised_value)
    y_train['appraised_value_pred_lm'] = lm.predict(X_train)
    rmse_train_lm = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_lm)**(1 / 2)
    y_validate['appraised_value_pred_lm'] = lm.predict(X_validate)
    rmse_validate_lm = mean_squared_error(
        y_validate.appraised_value_pred_lm)**(1 / 2)
    #LARS Model
    lars = LassoLars(alpha=1.0)
    lars.fit(X_train, y_train.appraised_value)
    y_train['appraised_value_pred_lars'] = lars.predict(X_train)
    rmse_train_lars = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_lars)**1 / 2
    y_validate['appraised_value_pred_lars'] = lars.predict(X_validate)
    rmse_validate_lars = mean_squared_error(
        y_validate.appraised_value_pred_lars)**1 / 2
    glm = TweedieRegressor(power=1, alpha=0)
    glm.fit(X_train, y_train.appraised_value)
    y_train['appraised_value_pred_glm'] = glm.predict(X_train)
    rmse_train_glm = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_glm)**1 / 2
    y_validate['appraised_value_pred_glm'] = glm.predict(X_validate)
    rmse_validate_glm = mean_squared_error(
        y_validate.appraised_value, y_validate.appraised_value_pred_glm)**1 / 2
    # PF
    pf = PolynomialFeatures(degree=2)
    X_train_degree2 = pf.fit_transform(X_train)
    X_validate_degree2 = pf.transform(X_validate)
    X_test_degree2 = pf.transform(X_test)
    # LM2
    lm2 = LinearRegression(normalize=True)
    lm2.fit(X_train_degree2, y_train.appraised_value)
    y_train['appraised_value_pred_lm2'] = lm2.predict(X_train_degree2)
    rmse_train_lm2 = mean_squared_error(
        y_train.appraised_value, y_train.appraised_value_pred_lm2)**1 / 2
    y_validate['appraised_value_pred_lm2'] = lm2.predict(X_validate_degree2)
    rmse_validate_lm2 = mean_squared_error(
        y_validate.appraised_value, y_validate.appraised_value_pred_lm2)**1 / 2
    print("RMSE for OLS using LinearRegression\nTraining/In-Sample: ",
          rmse_train_lm, "\nValidation/Out-of-Sample: ", rmse_validate_lm)
    print("RMSE for Lasso + Lars\nTraining/In-Sample: ", rmse_train_lars,
          "\nValidation/Out-of-Sample: ", rmse_validate_lars)
        "RMSE for GLM using Tweedie, power=1 & alpha=0\nTraining/In-Sample: ",
        rmse_train_glm, "\nValidation/Out-of-Sample: ", rmse_validate_glm)
    print("RMSE for Polynomial Model, degrees=2\nTraining/In-Sample: ",
          rmse_train_lm2, "\nValidation/Out-of-Sample: ", rmse_validate_lm2)
コード例 #12
    def compute_pruned_kernel(  # pylint: disable=too-many-locals,too-many-branches,too-many-statements
        """compute which channels to be pruned by lasso"""

        tf.logging.info('computing pruned kernel')

        nb_samples = X.shape[0]
        c_in = X.shape[-1]
        c_out = W2.shape[-1]
        samples = np.random.randint(0, nb_samples, min(400, nb_samples // 20))
        reshape_X = np.rollaxis(
            np.transpose(X, (0, 3, 1, 2)).reshape(
                (nb_samples, c_in, -1))[samples], 1, 0)
        reshape_W2 = np.transpose(
            np.transpose(W2, (3, 2, 0, 1)).reshape((c_out, c_in, -1)),
            [1, 2, 0])
        product = np.matmul(reshape_X, reshape_W2).reshape((c_in, -1)).T
        reshape_Y = Y[samples].reshape(-1)

        # feature
        tmp = np.nonzero(np.sum(np.abs(product), 0))[0].size
        if FLAGS.debug:
            tf.logging.info('feature num: {}, non zero: {}'.format(
                product.shape[1], tmp))

        solver = LassoLars(alpha=alpha, fit_intercept=False, max_iter=3000)

        def solve(alpha):
            """ Solve the Lasso"""
            solver.alpha = alpha
            solver.fit(product, reshape_Y)
            idxs = solver.coef_ != 0.
            tmp = sum(idxs)
            return idxs, tmp, solver.coef_

        tf.logging.info('pruned channel selecting')
        start = timer()

        if c_new == c_in:
            idxs = np.array([True] * c_new)
            left = 0
            right = alpha
            lbound = c_new - tolerance * c_in / 2
            rbound = c_new + tolerance * c_in / 2

            while True:
                _, tmp, coef = solve(right)
                if tmp < c_new:
                    right *= 2
                    if FLAGS.debug:
                        tf.logging.debug("relax right to {}".format(right))
                            "we expect got less than {} channels, but got {} channels"
                            .format(c_new, tmp))

            while True:
                if lbound < 0:
                    lbound = 1
                idxs, tmp, coef = solve(alpha)
                # print loss
                loss = 1 / (2 * float(product.shape[0])) * \
                  np.sqrt(np.sum((reshape_Y - np.matmul(product, coef)) ** 2, axis=0)) + \
                    alpha * np.sum(np.fabs(coef))

                if FLAGS.debug:
                        'loss: {}, alpha: {}, feature nums: {}, left: {}, right: {}, \
              left_bound: {}, right_bound: {}'.format(loss, alpha, tmp, left,
                                                      right, lbound, rbound))

                if FLAGS.debug:
                        'tmp {}, lbound {}, rbound {}, alpha {}, left {}, right {}'
                        .format(tmp, lbound, rbound, alpha, left, right))
                if FLAGS.cp_quadruple:
                    if tmp % 4 == 0 and abs(tmp - lbound) <= 2:

                if lbound <= tmp and tmp <= rbound:
                    if FLAGS.cp_quadruple:
                        if tmp % 4 == 0:
                        elif tmp % 4 <= 2:
                            rbound = tmp - 1
                            lbound = lbound - 2
                            lbound = tmp + 1
                            rbound = rbound + 2
                elif abs(left - right) <= right * 0.1:
                    if lbound > 1:
                        lbound = lbound - 1
                    if rbound < c_in:
                        rbound = rbound + 1
                    left = left / 1.2
                    right = right * 1.2
                elif tmp > rbound:
                    left = left + (alpha - left) / 2
                    right = right - (right - alpha) / 2

                if alpha < 1e-10:

                alpha = (left + right) / 2
            c_new = tmp

        tf.logging.info('Channel selection time cost: {}s'.format(timer() -

        start = timer()
        tf.logging.info('Feature map reconstructing')
        newW2, _ = self.featuremap_reconstruction(X[:, :, :, idxs].reshape(
            (nb_samples, -1)),

            'Feature map reconstruction time cost: {}s'.format(timer() -

        return idxs, newW2
コード例 #13
def cvx_online_dict_learning(X, y_true, n_hat, k_cluster, T, lmda, eps, 
        flag=True, version = 'Rr'):
    X: R^(n * m)
    y_true: str^n
    W_0: R^(n_hat * k)
    x_i : R^m
    alpha: R^k
    cvx_online problem 
        min||x_i - X.T * W * alpha|| + lambda * ||alpha||

    in the online setting, there is no X in (n * m), 
    instead, we need to store a candidate set and solve the subproblem:
        min ||x_i - X_hat * W_hat * alpha|| + lambda * ||alpha||

    X_hat : R^(m * n_hat)
    W_hat : R^(n_hat * k)

    version: Rr, restricted, heuristic approach
             Ru, uniform, random assignment
    n_dim, m_dim = X.shape

    A_t = np.zeros((k_cluster, k_cluster))
    B_t = np.zeros((m_dim, k_cluster))
    x_sum = 0
    alpha_sum = 0

    # step 1: sample n_hat * k_cluster points as initial X_hat.
    X_0 = np.zeros((m_dim, n_hat))
    for idx in range(n_hat):
        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]
        X_0[:, idx] = x_sample

    # step 1: initialization, get X_hat (including clusters info)
    # and W_hat from X_0, using same init as in CNMF.
    # here representative_size_count is the n_1_hat, n_2_hat, ..., n_k_hat.
    t1 = time.time()
    X_hat, W_hat, representative_size_count = initialize_X_W_hat(X_0, k_cluster)
    X_0, W_0 = X_hat.copy(), W_hat.copy()
    t2 = time.time()
    # print('init cost {:.4f}'.format(t2 - t1))
    # step 2: after initialization of X_hat, update alpha, W_hat and X_hat alternatively.
    t_start = time.time()
    print(lmda, _NF, eps)
    for t in range(T):
        # t_start_online = time.time()
        if t % 50 == 0 and flag:
            D_t = np.matmul(X_hat, W_hat)
            tmp_assignment = get_clustering_assignment_1(X, D_t, k_cluster)
            tmp_acc, tmp_AMI = evaluation_clustering(tmp_assignment, y_true)
            print('1)iteration {}, distance acc = {:.4f}, AMI = {:.4f}'.format(t, tmp_acc, tmp_AMI))

            tmp_assignment = get_clustering_assignment_2(X, D_t, k_cluster, lmda)
            tmp_acc, tmp_AMI = evaluation_clustering(tmp_assignment, y_true)
            print('2)iteration {}, kmeans of weights acc = {:.4f}, AMI = {:.4f}'.format(t, tmp_acc, tmp_AMI))
            t_end = time.time()
            print('time elapse = {:.4f}s'.format(t_end - t_start))
            t_start = t_end

            print('-' * 7)

        sample_idx = np.random.randint(0, n_dim)
        x_sample = X[sample_idx, :]

        # update alpha
        t1 = time.time()
        lars_lasso = LassoLars(alpha = lmda, max_iter = 500)
        D_t = np.matmul(X_hat, W_hat)
        lars_lasso.fit(D_t, x_sample)
        alpha_t = lars_lasso.coef_
        t2 = time.time()
        # print('lasso cost {:.4f}s'.format(t2 - t1))
        # using different clustering assignment
        t1 = time.time()
        if version == 'Rr':
            cluster_of_x_i = np.argmax(alpha_t)
        # elif version == 'Ru':
            cluster_of_x_i = int(np.random.uniform(0, k_cluster))
        t2 = time.time()
        # print('argmax alpha cost {:.4f}s'.format(t2 - t1))

        t1 = time.time()
        A_t += np.matmul(alpha_t.reshape(k_cluster, 1), alpha_t.reshape(1, k_cluster))
        B_t += np.matmul(x_sample.reshape(m_dim, 1), alpha_t.reshape(1, k_cluster))
        x_sum += (np.linalg.norm(x_sample) ** 2)
        alpha_sum += lmda * np.linalg.norm(alpha_t, 1)
        t2 = time.time()
        # print('update At, Bt cost {:.4f}s'.format(t2 - t1))

        # update X_hat
        t1 = time.time()
        W_hat, X_hat = update_W_X_hat(W_hat, X_hat, representative_size_count, x_sample, cluster_of_x_i, 
                A_t, B_t, x_sum, alpha_sum, t, eps)
        t2 = time.time()
        # print('update X_hat, W_hat cost {:.4f}s'.format(t2 - t1))

    print('Dcitionary update done! Time elapse {:.04f}s'.format(time.time() - t_start))

    return W_hat, X_hat, representative_size_count, X_0, W_0
コード例 #14
ファイル: script_18.py プロジェクト: isayantani/Kaggle
                                  LogisticRegression, LassoCV)
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler

estimators = [
    ('LinReg', LinearRegression(fit_intercept=False)),
    ('Theil_Sen', TheilSenRegressor(fit_intercept=False)),
    ('Ridge', Ridge(fit_intercept=False)),
    ('HuberRegressor', HuberRegressor(fit_intercept=False)),
    ('BayesRidge', BayesianRidge(fit_intercept=False)),
    ('LassoLars', LassoLars(fit_intercept=False, alpha=25)),
    ('Lasso', Lasso(fit_intercept=False, alpha=25)),
    ('ElasticNet', ElasticNet(alpha=13, fit_intercept=False)),
    ('ARDRegression', ARDRegression(fit_intercept=False))
#The "environment" is our interface for code competitions

# The "environment" is our interface for code competitions
env = kagglegym.make()

# We get our initial observation by calling "reset"
observation = env.reset()

# Note that the first observation we get has a "train" dataframe
trains = observation.train
print("Train has {} rows".format(len(trains)))
コード例 #15
ファイル: lasso.py プロジェクト: wnov/channel_pruning_lasso
def lasso_pruning(X, Y, W, c_new, alpha=1e-4, tolerance=0.02, debug=False):
    # Conv
    # Example: B is sample number
    #        : c_in is channel input
    #        : c_out is channel output
    #        : 3x3 is kernel size
    # X shape: [B, c_in, 3, 3]
    # Y shape: [B, c_out]
    # W shape: [c_out, c_in, 3, 3]
    # Linear
    # X shape: [B, c_in]
    # Y shape: [B, c_out]
    # W shape: [c_out, c_in]
    if debug:
        print("input shape: {}".format(X.shape))
        print("output shape: {}".format(Y.shape))
        print("weight shape: {}".format(W.shape))
        print("curr chn: {} target chn: {}".format(W.shape[1], c_new))
    num_samples = X.shape[0]  # num of training samples
    c_in = W.shape[1]  # num of input channels
    c_out = W.shape[0]  # num of output channels

    # conv
    if len(W.shape) == 4:
        # sample and reshape X to [c_in, B, 9]
        reshape_X = X.reshape((num_samples, c_in, -1)).transpose((1, 0, 2))
        # reshape W to [c_in, 9, c_out]
        reshape_W = W.reshape((c_out, c_in, -1)).transpose((1, 2, 0))
        # linear
        # sample and reshape X to [c_in, B] and expand to [c_in, B, 1]
        reshape_X = X.transpose((1, 0))[..., np.newaxis]
        # reshape to [c_in, 1, c_out]
        reshape_W = W.reshape((c_out, c_in, 1)).transpose((1, 2, 0))

    # reshape Y to [B x c_out]
    reshape_Y = Y.reshape(-1)

    # product has size [B x c_out, c_in]
    product = np.matmul(reshape_X, reshape_W).reshape((c_in, -1)).T

    # use LassoLars because it's more robust than Lasso
    solver = LassoLars(alpha=alpha, fit_intercept=False, max_iter=3000)

    # solver = Lasso(alpha=alpha, fit_intercept=False,
    #                max_iter=3000, warm_start=True, selection='random')

    def solve(alpha):
        """ Solve the Lasso"""
        solver.alpha = alpha
        solver.fit(product, reshape_Y)
        nonzero_inds = np.where(solver.coef_ != 0.)[0]
        nonzero_num = sum(solver.coef_ != 0.)
        return nonzero_inds, nonzero_num, solver.coef_

    tic = time.perf_counter()

    left = 0  # minimum alpha is 0, which means don't use lasso regularizer at all
    right = alpha

    # the left bound of num of selected channels
    lbound = c_new
    # the right bound of num of selected channels
    rbound = c_new + tolerance * c_new

    # increase alpha until the lasso can find a selection with size < c_new
    while True:
        _, keep_num, coef = solve(right)
        if debug:
            print("relax right to %.6f" % right)
            print("expected %d channels, but got %d channels" %
                  (c_new, keep_num))
        if keep_num < c_new:
            right *= 2

    # shrink the alpha for less aggressive lasso regularization
    # if the selected num of channels is less than the lbound
    while True:
        # binary search
        alpha = (left + right) / 2
        keep_inds, keep_num, coef = solve(alpha)
        # print loss
        # product has size [B x c_out, c_in]
        loss = 1 / (2 * float(product.shape[0])) * \
               np.sqrt(np.sum((reshape_Y - np.matmul(product, coef)) ** 2, axis=0)) + \
               alpha * np.sum(np.fabs(coef))

        if debug:
                'loss: %.6f, alpha: %.6f, feature nums: %d, '
                'left: %.6f, right: %.6f, left_bound: %.6f, right_bound: %.6f'
                % (loss, alpha, keep_num, left, right, lbound, rbound))

        if keep_num > rbound:
            left = alpha
        elif keep_num < lbound:
            right = alpha

        if alpha < 1e-10:

    toc = time.perf_counter()
    if debug:
        print('Lasso Regression time: %.2f s' % (toc - tic))
        print('Chn keep idx: {}'.format(keep_inds))
        print(c_new, keep_num)
    print("orig chn num = {} keep chn num = {}".format(c_in, keep_num))
    return keep_inds, keep_num
コード例 #16
ファイル: cv_estimators.py プロジェクト: dsbowen/fast-automl
 def make_estimator(self, **params):
     return make_pipeline(*self.preprocessors,
コード例 #17
ファイル: logica.py プロジェクト: raedjamw/stock-prediction
def task2(data):

    df = data

    dfreg = df.loc[:, ['Adj Close', 'Volume']]
    dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
    dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0

    # Drop missing value
    dfreg.fillna(value=-99999, inplace=True)
    # We want to separate 1 percent of the data to forecast
    forecast_out = int(math.ceil(0.01 * len(dfreg)))
    # Separating the label here, we want to predict the AdjClose
    forecast_col = 'Adj Close'
    dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
    X = np.array(dfreg.drop(['label'], 1))
    # Scale the X so that everyone can have the same distribution for linear regression
    X = preprocessing.scale(X)
    # Finally We want to find Data Series of late X and early X (train) for model generation and evaluation
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]
    # Separate label and identify it as y
    y = np.array(dfreg['label'])
    y = y[:-forecast_out]

    #Split data
    X_train, X_test, y_train, y_test = train_test_split(X,


    # Linear regression
    clfreg = LinearRegression(n_jobs=-1)
    clfreg.fit(X_train, y_train)
    # Quadratic Regression 2
    clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
    clfpoly2.fit(X_train, y_train)

    # Quadratic Regression 3
    clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
    clfpoly3.fit(X_train, y_train)

    # KNN Regression
    clfknn = KNeighborsRegressor(n_neighbors=2)
    clfknn.fit(X_train, y_train)

    # Lasso Regression
    clflas = Lasso()
    clflas.fit(X_train, y_train)

    # Multitask Lasso Regression
    # clfmtl = MultiTaskLasso(alpha=1.)
    # clfmtl.fit(X_train, y_train).coef_

    # Bayesian Ridge Regression
    clfbyr = BayesianRidge()
    clfbyr.fit(X_train, y_train)

    # Lasso LARS Regression
    clflar = LassoLars(alpha=.1)
    clflar.fit(X_train, y_train)

    # Orthogonal Matching Pursuit Regression
    clfomp = OrthogonalMatchingPursuit(n_nonzero_coefs=2)
    clfomp.fit(X_train, y_train)

    # Automatic Relevance Determination Regression
    clfard = ARDRegression(compute_score=True)
    clfard.fit(X_train, y_train)

    # Logistic Regression
    # clflgr = linear_model.LogisticRegression(penalty='l1', solver='saga', tol=1e-6, max_iter=int(1e6), warm_start=True)
    # coefs_ = []
    # for c in cs:
    #   clflgr.set_params(C=c)
    #   clflgr.fit(X_train, y_train)
    #   coefs_.append(clflgr.coef_.ravel().copy())

    clfsgd = SGDRegressor(random_state=0, max_iter=1000, tol=1e-3)
    clfsgd.fit(X_train, y_train)


    #Create confindence scores
    confidencereg = clfreg.score(X_test, y_test)
    confidencepoly2 = clfpoly2.score(X_test, y_test)
    confidencepoly3 = clfpoly3.score(X_test, y_test)
    confidenceknn = clfknn.score(X_test, y_test)
    confidencelas = clflas.score(X_test, y_test)
    # confidencemtl = clfmtl.score(X_test, y_test)
    confidencebyr = clfbyr.score(X_test, y_test)
    confidencelar = clflar.score(X_test, y_test)
    confidenceomp = clfomp.score(X_test, y_test)
    confidenceard = clfard.score(X_test, y_test)
    confidencesgd = clfsgd.score(X_test, y_test)

    # results
    print('The linear regression confidence is:', confidencereg * 100)
    print('The quadratic regression 2 confidence is:', confidencepoly2 * 100)
    print('The quadratic regression 3 confidence is:', confidencepoly3 * 100)
    print('The knn regression confidence is:', confidenceknn * 100)
    print('The lasso regression confidence is:', confidencelas * 100)
    # print('The lasso regression confidence is:',confidencemtl*100)
    print('The Bayesian Ridge regression confidence is:', confidencebyr * 100)
    print('The Lasso LARS regression confidence is:', confidencelar * 100)
    print('The OMP regression confidence is:', confidenceomp * 100)
    print('The ARD regression confidence is:', confidenceard * 100)
    print('The SGD regression confidence is:', confidencesgd * 100)

    #Create new columns
    forecast_reg = clfreg.predict(X_lately)
    forecast_pol2 = clfpoly2.predict(X_lately)
    forecast_pol3 = clfpoly3.predict(X_lately)
    forecast_knn = clfknn.predict(X_lately)
    forecast_las = clflas.predict(X_lately)
    forecast_byr = clfbyr.predict(X_lately)
    forecast_lar = clflar.predict(X_lately)
    forecast_omp = clfomp.predict(X_lately)
    forecast_ard = clfard.predict(X_lately)
    forecast_sgd = clfsgd.predict(X_lately)

    #Process all new columns data
    dfreg['Forecast_reg'] = np.nan

    last_date = dfreg.iloc[-1].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_reg:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg.loc[next_date] = [np.nan for _ in range(len(dfreg.columns))]
        dfreg['Forecast_reg'].loc[next_date] = i

    dfreg['Forecast_pol2'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_pol2:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_pol2'].loc[next_date] = i

    dfreg['Forecast_pol3'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_pol3:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_pol3'].loc[next_date] = i

    dfreg['Forecast_knn'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_knn:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_knn'].loc[next_date] = i

    dfreg['Forecast_las'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_las:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_las'].loc[next_date] = i

    dfreg['Forecast_byr'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_byr:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_byr'].loc[next_date] = i

    dfreg['Forecast_lar'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_lar:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_lar'].loc[next_date] = i

    dfreg['Forecast_omp'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_omp:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_omp'].loc[next_date] = i

    dfreg['Forecast_ard'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_ard:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_ard'].loc[next_date] = i

    dfreg['Forecast_sgd'] = np.nan

    last_date = dfreg.iloc[-26].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)

    for i in forecast_sgd:
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg['Forecast_sgd'].loc[next_date] = i

    return dfreg.index.format(formatter=lambda x: x.strftime(
        '%Y-%m-%d')), dfreg['Adj Close'].to_list(
        ), dfreg['Forecast_reg'].to_list(), dfreg['Forecast_pol2'].to_list(
        ), dfreg['Forecast_pol3'].to_list(), dfreg['Forecast_knn'].to_list(
        ), dfreg['Forecast_las'].to_list(), dfreg['Forecast_byr'].to_list(
        ), dfreg['Forecast_lar'].to_list(), dfreg['Forecast_omp'].to_list(
        ), dfreg['Forecast_ard'].to_list(), dfreg['Forecast_sgd'].to_list()
コード例 #18
ファイル: cv_estimators.py プロジェクト: dsbowen/fast-automl
 def make_estimator(self, **params):
     return make_pipeline(*self.preprocessors, PolynomialFeatures(),
                          StandardScaler(), PCA(),
コード例 #19
# LassoLars Regression
# The Least Angle Regression (LARS) can be used as an alternative method for calculating Least Absolute Shrinkage
# and Selection Operator (LASSO) fit.
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LassoLars

# load the iris datasets
dataset = datasets.load_diabetes()

# fit a LASSO using LARS model to the data
model = LassoLars(alpha=0.1)
model.fit(dataset.data, dataset.target)

# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)

# summarize the fit of the model
mse = np.mean((predicted - expected)**2)
print(model.score(dataset.data, dataset.target))
コード例 #20
    (numerical_scaler, numerical_cols)

preprocessor_oe = make_column_transformer(
    (categorical_encoder_oe, categorical_cols_oe),
#############################################Test All Models#####################################

models = {
    "lr": LinearRegression(),
    "lasso": Lasso(),
    "ridge": Ridge(),
    "elasticnet": ElasticNet(),
    "lassolars": LassoLars(),
    "bayridge": BayesianRidge(),
    "svr": SVR(),
    "knn": KNeighborsRegressor(),
    #"gaussianpr" : GaussianProcessRegressor(), mauvais rmse et long à executer
    "decisiontree": DecisionTreeRegressor(),
    "rf": RandomForestRegressor(),
    "extratree": ExtraTreesRegressor(),
    "adaboost": AdaBoostRegressor(),
    "gradientboost": GradientBoostingRegressor(),
    "xgb": xgb.XGBRegressor()

models_todense = ["lassolars", "bayridge", "gaussianpr"]

pipelines_oe_std = []
コード例 #21
# *************************************************************************************
# ...................................... Modelos .................................... #
# *************************************************************************************

## ************ a. Integracion temprana ************ ##
### ... Modelos lineales ... ###
regressors = { ## modelos
    'OLS': LinearRegression(),
    'ridge': Ridge(), 
    'lasso': Lasso(),
    #'multi-lasso': MultiTaskLasso(), 
    'elasticnet': ElasticNet(), 
    #'multi-elasticnet': MultiTaskElasticNet(),
    'lars': Lars(), 
    'lassolars': LassoLars(), 
    'orthogonalmatchingpursuit': OrthogonalMatchingPursuit(), 
    'bayesianridge': BayesianRidge(), 
    'passiveaggressivregressor': PassiveAggressiveRegressor(), 
    'ransacregressor': RANSACRegressor(), 
    'theilsenregressor': TheilSenRegressor(), 
    'huberregressor': HuberRegressor()

otra_param_grid = { ## grilla
    'OLS': {},
    "ridge": {'alpha': [0.01, 0.1, 1, 5, 100]},
    "lasso": {'alpha': [0.01, 0.1, 1, 5, 100]},
    #"multi-lasso": {'alpha': [0.01, 0.1, 1]}, 
    "elasticnet": {'alpha': [0.001, 0.05, 0.1, 1, 100], 'l1_ratio': [0.001, 0.05, 0.01, 0.1, 1, 100]}, 
    #"multi-elasticnet": {'alpha': [0.01, 0.1, 1], 'l1_ratio': [0.01, 0.1, 1]}, 
コード例 #22
ファイル: utils_models.py プロジェクト: zldeng/auto_ml
def get_model_from_name(model_name, training_params=None, is_hp_search=False):
    global keras_imported

    # For Keras
    epochs = 1000
    # if os.environ.get('is_test_suite', 0) == 'True' and model_name[:12] == 'DeepLearning':
    #     print('Heard that this is the test suite. Limiting number of epochs, which will increase training speed dramatically at the expense of model accuracy')
    #     epochs = 100

    all_model_params = {
        'LogisticRegression': {},
        'RandomForestClassifier': {
            'n_jobs': -2,
            'n_estimators': 30
        'ExtraTreesClassifier': {
            'n_jobs': -1
        'AdaBoostClassifier': {},
        'SGDClassifier': {
            'n_jobs': -1
        'Perceptron': {
            'n_jobs': -1
        'LinearSVC': {
            'dual': False
        'LinearRegression': {
            'n_jobs': -2
        'RandomForestRegressor': {
            'n_jobs': -2,
            'n_estimators': 30
        'LinearSVR': {
            'dual': False,
            'loss': 'squared_epsilon_insensitive'
        'ExtraTreesRegressor': {
            'n_jobs': -1
        'MiniBatchKMeans': {
            'n_clusters': 8
        'GradientBoostingRegressor': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        'GradientBoostingClassifier': {
            'presort': False,
            'learning_rate': 0.1,
            'warm_start': True
        'SGDRegressor': {
            'shuffle': False
        'PassiveAggressiveRegressor': {
            'shuffle': False
        'AdaBoostRegressor': {},
        'LGBMRegressor': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        'LGBMClassifier': {
            'n_estimators': 2000,
            'learning_rate': 0.15,
            'num_leaves': 8,
            'lambda_l2': 0.001,
            'histogram_pool_size': 16384
        'DeepLearningRegressor': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        'DeepLearningClassifier': {
            'epochs': epochs,
            'batch_size': 50,
            'verbose': 2
        'CatBoostRegressor': {},
        'CatBoostClassifier': {}

    # if os.environ.get('is_test_suite', 0) == 'True':
    #     all_model_params

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if is_hp_search == True:
        if model_name[:12] == 'DeepLearning':
            model_params['epochs'] = 50
        if model_name[:4] == 'LGBM':
            model_params['n_estimators'] = 500

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
            'After overwriting our defaults with your values, here are the final params that will be used to initialize the model:'

    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'LinearSVC': LinearSVC(),

        # Regressors
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'LinearSVR': LinearSVR(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans(),

        model_map['SGDClassifier'] = SGDClassifier(max_iter=1000, tol=0.001)
        model_map['Perceptron'] = Perceptron(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
            max_iter=1000, tol=0.001)
        model_map['SGDRegressor'] = SGDRegressor(max_iter=1000, tol=0.001)
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor(
            max_iter=1000, tol=0.001)
    except TypeError:
        model_map['SGDClassifier'] = SGDClassifier()
        model_map['Perceptron'] = Perceptron()
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(
        model_map['SGDRegressor'] = SGDRegressor()
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor()

    if xgb_installed:
        model_map['XGBClassifier'] = XGBClassifier()
        model_map['XGBRegressor'] = XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = LGBMRegressor()
        model_map['LGBMClassifier'] = LGBMClassifier()

    if catboost_installed:
        model_map['CatBoostRegressor'] = CatBoostRegressor(
        model_map['CatBoostClassifier'] = CatBoostClassifier(

    if model_name[:12] == 'DeepLearning':
        if keras_imported == False:
            # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead)
                os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
                os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
                from tensorflow import logging

            global maxnorm
            global Dense, Dropout
            global LeakyReLU, PReLU, ThresholdedReLU, ELU
            global Sequential
            global keras_load_model
            global regularizers, optimizers
            global Activation
            global KerasRegressor, KerasClassifier

            from keras.constraints import maxnorm
            from keras.layers import Activation, Dense, Dropout
            from keras.layers.advanced_activations import LeakyReLU, PReLU, ThresholdedReLU, ELU
            from keras.models import Sequential
            from keras.models import load_model as keras_load_model
            from keras import regularizers, optimizers
            from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
            keras_imported = True

        model_map['DeepLearningClassifier'] = KerasClassifier(
        model_map['DeepLearningRegressor'] = KerasRegressor(

        model_without_params = model_map[model_name]
    except KeyError as e:
            'It appears you are trying to use a library that is not available when we try to import it, or using a value for model_names that we do not recognize'
        raise (e)

    if os.environ.get('is_test_suite', False) == 'True':
        if 'n_jobs' in model_params:
            model_params['n_jobs'] = 1
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
コード例 #23
@pytest.mark.parametrize('copy_X', [True, False])
def test_lasso_lars_fit_copyX_behaviour(copy_X):
    Test that user input to .fit for copy_X overrides default __init__ value

    lasso_lars = LassoLarsIC(precompute=False)
    rng = np.random.RandomState(0)
    X = rng.normal(0, 1, (100, 5))
    X_copy = X.copy()
    y = X[:, 2]
    lasso_lars.fit(X, y, copy_X=copy_X)
    assert copy_X == np.array_equal(X, X_copy)

@pytest.mark.parametrize('est', (LassoLars(alpha=1e-3), Lars()))
def test_lars_with_jitter(est):
    # Test that a small amount of jitter helps stability,
    # using example provided in issue #2746

    X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0],
                  [0.0, -1.0, 0.0, 0.0, 0.0]])
    y = [-2.5, -2.5]
    expected_coef = [0, 2.5, 0, 2.5, 0]

    # set to fit_intercept to False since target is constant and we want check
    # the value of coef. coef would be all zeros otherwise.
    est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)

    est.fit(X, y)
コード例 #24
logging.info('Scaling features...')

column_name_list = list(feature_df.columns)

feature_scaler = StandardScaler()
feature_df[column_name_list] = feature_scaler.fit_transform(feature_df[column_name_list])

# Initialize models

clf_ridg = Ridge(max_iter=5000)
clf_laso = Lasso(max_iter=5000)
clf_lala = LassoLars(max_iter=5000)
clf_enet = ElasticNet(max_iter=5000)

clf_xgbr = xgb.XGBRegressor()
clf_xgrf = xgb.XGBRFRegressor()

clf_rf = RandomForestRegressor(criterion='mae', max_features='sqrt')
clf_tree = ExtraTreesRegressor(criterion='mae', max_features='sqrt')
clf_ada = AdaBoostRegressor()
clf_grad = GradientBoostingRegressor()
clf_svr = SVR()

# Model parameters

# mae 2.160
コード例 #25
def GetAllModelsForComparison(X_train, Y_train):
    models = {
        'ARDRegression': ARDRegression(),
        'BayesianRidge': BayesianRidge(),
        'ElasticNet': ElasticNet(),
        'ElasticNetCV': ElasticNetCV(),
        'Hinge': Hinge(),
        #'Huber': Huber(),
        'HuberRegressor': HuberRegressor(),
        'Lars': Lars(),
        'LarsCV': LarsCV(),
        'Lasso': Lasso(),
        'LassoCV': LassoCV(),
        'LassoLars': LassoLars(),
        'LassoLarsCV': LassoLarsCV(),
        'LinearRegression': LinearRegression(),
        'Log': Log(),
        'LogisticRegression': LogisticRegression(),
        'LogisticRegressionCV': LogisticRegressionCV(),
        'ModifiedHuber': ModifiedHuber(),
        'MultiTaskElasticNet': MultiTaskElasticNet(),
        'MultiTaskElasticNetCV': MultiTaskElasticNetCV(),
        'MultiTaskLasso': MultiTaskLasso(),
        'MultiTaskLassoCV': MultiTaskLassoCV(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'OrthogonalMatchingPursuitCV': OrthogonalMatchingPursuitCV(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
        'Perceptron': Perceptron(),
        'RANSACRegressor': RANSACRegressor(),
        #'RandomizedLasso': RandomizedLasso(),
        #'RandomizedLogisticRegression': RandomizedLogisticRegression(),
        'Ridge': Ridge(),
        'RidgeCV': RidgeCV(),
        'RidgeClassifier': RidgeClassifier(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        'SquaredLoss': SquaredLoss(),
        'TheilSenRegressor': TheilSenRegressor(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LinearClassifierMixin': LinearClassifierMixin(),
        'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis(),
        'QuadraticDiscriminantAnalysis': QuadraticDiscriminantAnalysis(),
        'StandardScaler': StandardScaler(),
        'TransformerMixin': TransformerMixin(),
        'BaseEstimator': BaseEstimator(),
        'KernelRidge': KernelRidge(),
        'RegressorMixin': RegressorMixin(),
        'LinearSVC': LinearSVC(),
        'LinearSVR': LinearSVR(),
        'NuSVC': NuSVC(),
        'NuSVR': NuSVR(),
        'OneClassSVM': OneClassSVM(),
        'SVC': SVC(),
        'SVR': SVR(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        #'BallTree': BallTree(),
        #'DistanceMetric': DistanceMetric(),
        #'KDTree': KDTree(),
        'KNeighborsClassifier': KNeighborsClassifier(),
        'KNeighborsRegressor': KNeighborsRegressor(),
        'KernelDensity': KernelDensity(),
        #'LSHForest': LSHForest(),
        'LocalOutlierFactor': LocalOutlierFactor(),
        'NearestCentroid': NearestCentroid(),
        'NearestNeighbors': NearestNeighbors(),
        'RadiusNeighborsClassifier': RadiusNeighborsClassifier(),
        'RadiusNeighborsRegressor': RadiusNeighborsRegressor(),
        #'GaussianProcess': GaussianProcess(),
        'GaussianProcessRegressor': GaussianProcessRegressor(),
        'GaussianProcessClassifier': GaussianProcessClassifier(),
        'CCA': CCA(),
        'PLSCanonical': PLSCanonical(),
        'PLSRegression': PLSRegression(),
        'PLSSVD': PLSSVD(),
        #'ABCMeta': ABCMeta(),
        #'BaseDiscreteNB': BaseDiscreteNB(),
        'BaseEstimator': BaseEstimator(),
        #'BaseNB': BaseNB(),
        'BernoulliNB': BernoulliNB(),
        'ClassifierMixin': ClassifierMixin(),
        'GaussianNB': GaussianNB(),
        'LabelBinarizer': LabelBinarizer(),
        'MultinomialNB': MultinomialNB(),
        'DecisionTreeClassifier': DecisionTreeClassifier(),
        'DecisionTreeRegressor': DecisionTreeRegressor(),
        'ExtraTreeClassifier': ExtraTreeClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'BaggingClassifier': BaggingClassifier(),
        'BaggingRegressor': BaggingRegressor(),
        #'BaseEnsemble': BaseEnsemble(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'IsolationForest': IsolationForest(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RandomForestRegressor': RandomForestRegressor(),
        'RandomTreesEmbedding': RandomTreesEmbedding(),
        #'VotingClassifier': VotingClassifier(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LabelBinarizer': LabelBinarizer(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'OneVsOneClassifier': OneVsOneClassifier(),
        #'OneVsRestClassifier': OneVsRestClassifier(),
        #'OutputCodeClassifier': OutputCodeClassifier(),
        'Parallel': Parallel(),
        #'ABCMeta': ABCMeta(),
        'BaseEstimator': BaseEstimator(),
        #'ClassifierChain': ClassifierChain(),
        'ClassifierMixin': ClassifierMixin(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'MultiOutputClassifier': MultiOutputClassifier(),
        #'MultiOutputEstimator': MultiOutputEstimator(),
        #'MultiOutputRegressor': MultiOutputRegressor(),
        'Parallel': Parallel(),
        'RegressorMixin': RegressorMixin(),
        'LabelPropagation': LabelPropagation(),
        'LabelSpreading': LabelSpreading(),
        'BaseEstimator': BaseEstimator(),
        'IsotonicRegression': IsotonicRegression(),
        'RegressorMixin': RegressorMixin(),
        'TransformerMixin': TransformerMixin(),
        'BernoulliRBM': BernoulliRBM(),
        'MLPClassifier': MLPClassifier(),
        'MLPRegressor': MLPRegressor()
    return models
コード例 #26
ファイル: models_template.py プロジェクト: jslomkowski/aml
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor

aml_basic_regressors = [
    ('model1', LinearRegression()),
    ('model2', Lasso()),
    ('model3', Ridge()),
    ('model4', ElasticNet()),
    ('model5', Lars()),
    ('model6', LassoLars()),
    ('model7', OrthogonalMatchingPursuit()),
    ('model8', BayesianRidge()),
    ('model9', ARDRegression()),
    ('model10', PassiveAggressiveRegressor()),
    ('model11', RANSACRegressor()),
    ('model12', TheilSenRegressor()),
    ('model13', HuberRegressor()),
    ('model14', KernelRidge()),
    ('model15', SVR()),
    ('model16', KNeighborsRegressor()),
    ('model17', DecisionTreeRegressor()),
    ('model18', RandomForestRegressor()),
    ('model19', ExtraTreesRegressor()),
    ('model20', AdaBoostRegressor()),
    ('model21', GradientBoostingRegressor()),
コード例 #27

feature_scaler = StandardScaler()
feature_df[feat_column_name_list] = feature_scaler.fit_transform(
test_x[test_column_name_list] = feature_scaler.transform(

# Initialize models

clf_line = LinearRegression()
clf_ridg = Ridge(alpha=300, tol=1e-05, solver='sparse_cg', max_iter=5000)
clf_laso = Lasso(alpha=0.1, tol=1e-05, max_iter=5000)
clf_lala = LassoLars(alpha=0.001, max_iter=5000)
clf_enet = ElasticNet(alpha=0.1, tol=0.001, l1_ratio=0.2, max_iter=5000)

clf_xgbr = xgb.XGBRegressor()  # not yet
clf_xgrf = xgb.XGBRFRegressor()  # not yet

clf_rf = RandomForestRegressor(criterion='mae',
clf_tree = ExtraTreesRegressor(criterion='mae',
clf_ada = AdaBoostRegressor(n_estimators=3, loss='linear')
clf_grad = GradientBoostingRegressor()  # not yet
コード例 #28
def sparse_encode(X,
    """Generic sparse coding

    Each column of the result is the solution to a sparse coding problem.

    X : array of shape (n_samples, n_pixels)
        Data matrix.

    dictionary : array of shape (n_dictionary, n_pixels)
        The dictionary matrix against which to solve the sparse coding of
        the data. Some of the algorithms assume normalized rows.

    algorithm : {'mp', 'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}
        mp :  Matching Pursuit
        lars: uses the least angle regression method (linear_model.lars_path)
        lasso_lars: uses Lars to compute the Lasso solution
        lasso_cd: uses the coordinate descent method to compute the
        Lasso solution (linear_model.Lasso). lasso_lars will be faster if
        the estimated dictionary are sparse.
        omp: uses orthogonal matching pursuit to estimate the sparse solution
        threshold: squashes to zero all coefficients less than regularization
        from the projection dictionary * data'

    max_iter : int, 1000 by default
        Maximum number of iterations to perform if `algorithm='lasso_cd'`.

    verbose : int
        Controls the verbosity; the higher, the more messages. Defaults to 0.

    code : array of shape (n_samples, n_dictionary)
        The sparse codes

    if X.ndim == 1:
        X = X[:, np.newaxis]
    #n_samples, n_pixels = X.shape

    if algorithm == 'lasso_lars':
        alpha = float(regularization) / n_pixels  # account for scaling

        from sklearn.linear_model import LassoLars

        # Not passing in verbose=max(0, verbose-1) because Lars.fit already
        # corrects the verbosity level.
        cov = np.dot(dictionary, X.T)
        lasso_lars = LassoLars(alpha=fit_tol,
        lasso_lars.fit(dictionary.T, X.T, Xy=cov)
        sparse_code = lasso_lars.coef_.T

    elif algorithm == 'lasso_cd':
        alpha = float(regularization) / n_pixels  # account for scaling

        # TODO: Make verbosity argument for Lasso?
        # sklearn.linear_model.coordinate_descent.enet_path has a verbosity
        # argument that we could pass in from Lasso.
        from sklearn.linear_model import Lasso
        clf = Lasso(alpha=fit_tol,

        if init is not None:
            clf.coef_ = init

        clf.fit(dictionary.T, X.T, check_input=check_input)
        sparse_code = clf.coef_.T

    elif algorithm == 'lars':

        # Not passing in verbose=max(0, verbose-1) because Lars.fit already
        # corrects the verbosity level.
        from sklearn.linear_model import Lars
        cov = np.dot(dictionary, X.T)
        lars = Lars(fit_intercept=False,
        lars.fit(dictionary.T, X.T, Xy=cov)
        sparse_code = lars.coef_.T

    elif algorithm == 'threshold':
        cov = np.dot(dictionary, X.T)
        sparse_code = ((np.sign(cov) *
                        np.maximum(np.abs(cov) - regularization, 0))).T

    elif algorithm == 'omp':
        # TODO: Should verbose argument be passed to this?
        from sklearn.linear_model import orthogonal_mp_gram
        from sklearn.utils.extmath import row_norms

        cov = np.dot(dictionary, X.T)
        gram = np.dot(dictionary, dictionary.T)
        sparse_code = orthogonal_mp_gram(Gram=gram,

    elif algorithm == 'mp':
        sparse_code = mp(X,
        raise ValueError(
            'Sparse coding method must be "mp", "lasso_lars" '
            '"lasso_cd",  "lasso", "threshold" or "omp", got %s.' % algorithm)
    return sparse_code
コード例 #29
################################### MODELS ###############################################################

### SGDRegressor
from sklearn.linear_model import SGDRegressor
regressor_sgd = SGDRegressor()

### BayesianRidge
from sklearn.linear_model import BayesianRidge
regressor_br = BayesianRidge()

### LassoLars
from sklearn.linear_model import LassoLars
regressor_ll = LassoLars()

from sklearn.linear_model import XGBRegressor  
from xgboost import XGBRegressor
regressor_xgb = XGBRegressor()

#  Applying K-fold cross validation

from sklearn.model_selection import cross_val_score

accuracies_sgd = cross_val_score(estimator = regressor_sgd, X = X_train, y = y_train, cv = 10, n_jobs = -1)  
accuracies_br = cross_val_score(estimator = regressor_br, X = X_train, y = y_train, cv = 10, n_jobs = -1) 
accuracies_ll = cross_val_score(estimator = regressor_ll, X = X_train, y = y_train, cv = 5, n_jobs = -1)  
コード例 #30
ファイル: A1.py プロジェクト: Prabhat1808/MachineLearning
    x_ts = np.concatenate((x_test, np.square(x_test), np.power(x_test, 3)),

    # print (MSELasso(y_test,pred.reshape((pred.size,1))))
    vals = [0.0000001, 0.0001, 1, 10]
    errors = np.empty(4)

    for j in range(4):

        lm = vals[j]
        k = 4
        err = np.empty(k)
        l = int(np.ma.size(x_train, axis=0) / k)
        x_cv, x_tr = np.split(x_train.copy(), [l], axis=0)
        y_cv, y_tr = np.split(y_train.copy(), [l], axis=0)
        model = LassoLars(alpha=lm)
        model.fit(x_tr, y_tr.ravel())
        pred = model.predict(x_cv)
        err[0] = MSELasso(y_cv, pred.reshape((pred.size, 1)))

        for i in range(k - 1):
            x_tr[i * l:(i + 1) * l], x_cv = x_cv, x_tr[i * l:(i + 1) *
            y_tr[i * l:(i + 1) * l], y_cv = y_cv, y_tr[i * l:(i + 1) *
            model = LassoLars(alpha=lm)
            model.fit(x_tr, y_tr.ravel())
            pred = model.predict(x_cv)
            err[i + 1] = MSELasso(y_cv, pred.reshape((pred.size, 1)))

        errors[j] = np.mean(err)