コード例 #1
0
def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]

    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)

    clf = GradientBoostingRegressor(random_state=0,
                                    n_estimators=50,
                                    learning_rate=0.1)
    clf.fit(X, y)

    y_predict = clf.predict(X)

    result = {
        "mean_absolute_error": mean_absolute_error(y, y_predict),
    }
    print(result)
    return {}, result
コード例 #2
0
def test_gradient_boosting_estimator_with_smooth_quantile_loss():
    np.random.seed(0)
    m = 15000
    n = 10
    p = .8
    X = np.random.normal(size=(m,n))
    beta = np.random.normal(size=n)
    mu = np.dot(X, beta)
    y = np.random.lognormal(mu)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33333333333333)
    loss_function = SmoothQuantileLossFunction(1, p, .0001)
    q_loss = QuantileLossFunction(1, p)
    model = Booster(BaggingRegressor(Earth(max_degree=2, verbose=False, use_fast=True, max_terms=10)), 
                                      loss_function, n_estimators=150, 
                                      stopper=stop_after_n_iterations_without_percent_improvement_over_threshold(3, .01), verbose=True)
    assert_raises(NotFittedError, lambda : model.predict(X_train))
    
    model.fit(X_train, y_train)
    
    prediction = model.predict(X_test)
    model2 = GradientBoostingRegressor(loss='quantile', alpha=p)
    model2.fit(X_train, y_train)
    prediction2 = model2.predict(X_test)
    assert_less(q_loss(y_test, prediction), q_loss(y_test, prediction2))
    assert_greater(r2_score(y_test,prediction), r2_score(y_test,prediction2))
    q = np.mean(y_test <= prediction)
    assert_less(np.abs(q-p), .05)
    assert_greater(model.score_, 0.)
    assert_approx_equal(model.score(X_train, y_train), model.score_)
コード例 #3
0
ファイル: gbdt-regression.py プロジェクト: zpskt/FATE
def main(config="../../config.yaml", param="./gbdt_config_reg.yaml"):

    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]

    print('config is {}'.format(config))
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
        print('data base dir is', data_base_dir)
    else:
        data_base_dir = config.data_base_dir

    # prepare data
    df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest),
                           index_col=idx)
    df_host = pd.read_csv(os.path.join(data_base_dir, data_host),
                          index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    clf = GradientBoostingRegressor(random_state=0, n_estimators=50)
    clf.fit(X, y)

    y_predict = clf.predict(X)

    result = {"mean_absolute_error": mean_absolute_error(y, y_predict)}
    print(result)
    return {}, result
コード例 #4
0
    def run(self):
        loss = self.lossComboBox.currentText()
        if loss == 'Least Squares':
            loss = 'ls'
        if loss == 'Least Absolute Deviation':
            loss = 'lad'
        if loss == 'Huber':
            loss = 'huber'
        if loss == 'Quantile':
            loss = 'quantile'

        params = {
            'loss': loss,
            'learning_rate': self.learningDoubleSpinBox.value(),
            'n_estimators': self.numEstSpinBox.value(),
            'subsample': self.subsampleDoubleSpinBox.value(),
            'criterion': 'friedman_mse',
            'min_samples_split': self.min_n_splitSpinBox.value(),
            'min_samples_leaf': self.min_n_leafSpinBox.value(),
            'min_weight_fraction_leaf': self.min_fractionDoubleSpinBox.value(),
            'max_depth': self.max_depthSpinBox.value(),
            'min_impurity_decrease': self.min_imp_decDoubleSpinBox.value(),
            'random_state': 1,
            'alpha': self.alphaDoubleSpinBox.value()
        }
        return params, self.getChangedValues(params,
                                             GradientBoostingRegressor())
コード例 #5
0
def train_model(data):
    half_len = len(data)

    # train
    X = []
    y = []
    for [c, cb, delta] in data[:half_len]:
        X.append([c, cb])
        y.append(delta)

    svr_rbf_general = svm.SVR(kernel='rbf')
    svr_linear_general = svm.SVR(kernel='linear')
    svr_rbf = svm.SVR(kernel='rbf', C=1e3, gamma=0.1)
    svr_lin = svm.SVR(kernel='linear', C=1e3)
    svr_poly = svm.SVR(kernel='poly', C=1e3, degree=2)

    model_br = BayesianRidge()
    model_lr = LinearRegression()
    model_etc = ElasticNet()
    model_svr = SVR()
    model_gbr = GradientBoostingRegressor()

    # clf = svr_linear_general
    clf = svr_linear_general
    clf.fit(X, y)

    return clf
コード例 #6
0
ファイル: gbdt-regression.py プロジェクト: wzxJayce/FATE
def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    data_test = param["data_test"]
    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df_test = pd.read_csv(data_test, index_col=idx)

    df = pd.concat([df_guest, df_host], axis=0)
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    X = df.drop(label_name, axis=1)
    X_guest = df_guest.drop(label_name, axis=1)
    y_guest = df_guest[label_name]
    clf = GradientBoostingRegressor(n_estimators=50)
    clf.fit(X, y)
    y_predict = clf.predict(X_guest)

    result = {
        "mean_squared_error": mean_squared_error(y_guest, y_predict),
        "mean_absolute_error": mean_absolute_error(y_guest, y_predict)
    }
    print(result)
    return {}, result
コード例 #7
0
ファイル: regression.py プロジェクト: redinton/ML_WHEEL
	def tune_gbr(self):
		parameters = {'kernel':['rbf','linear'],
              'C':[88,89,90,91,92],
              'gamma':[0.34,0.36,0.37]}
		clf = GridSearchCV(GradientBoostingRegressor(),parameters,verbose=2)
		clf.fit(self.X_train,self.y_train)
		print (clf.best_params_)
		print (clf.best_score_)
コード例 #8
0
def GDBT_ALL(trainFileName, testFileName):
    train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    Eval_X, items = ld.LoadData_DATA_ITEM(testFileName)
    clf = GradientBoostingRegressor(loss='lad', n_estimators=40, learning_rate=0.1, max_depth=3).\
            fit(train_X, train_y)
    pred_y = clf.predict(Eval_X)
    res = []
    for i in range(len(Eval_X)):
        res.append([items[i], 'all', '%.4f' % max(pred_y[i], 0)])
    return res
コード例 #9
0
def getModels():
    models = {}
    models['dt'] = DecisionTreeRegressor(max_depth=50)
    models['rf1'] = RandomForestRegressor()
    models['rf2'] = RandomForestRegressor(n_estimators=128, max_depth=15)
    models['gbr'] = GradientBoostingRegressor(n_estimators=128,
                                              max_depth=5,
                                              learning_rate=1.0)
    # models['abr'] = AdaBoostRegressor(n_estimators=128)
    return models
コード例 #10
0
ファイル: Model_train.py プロジェクト: HelloAtomorrow/Ad
def model_build(train_set):
    X = train_set.iloc[:, 6:11]
    Y = train_set['label']
    #print(X.head(5))
    #print(Y.head(5))
    model = GradientBoostingRegressor()
    #model = GradientBoostingClassifier()
    model.fit(X, Y)
    print(model.feature_importances_)
    #print(model)
    return model
コード例 #11
0
def train_model():
    global train_x, train_y, test_x
    gbr = GradientBoostingRegressor()
    cv_score = cross_val_score(gbr, train_x, train_y).mean()
    print(cv_score)
    nn = MLPRegressor()
    cv_score = cross_val_score(nn, train_x, train_y).mean()
    print(cv_score)
    rft = RandomForestRegressor()
    cv_score = cross_val_score(rft, train_x, train_y).mean()
    print(cv_score)
コード例 #12
0
def create_models():
    models = {
        'BayesianRidge': BayesianRidge(),
        # 'LinearRegression': LinearRegression(),
        'ElasticNet': ElasticNet(),
        'SVR(rbf)': SVR(kernel='rbf'),
        'SVR(linear)': SVR(kernel='linear'),
        'Lasso': Lasso(),
        'GBR': GradientBoostingRegressor(n_estimators=300, max_depth=3),
    }
    return models
コード例 #13
0
def test_argument_names():
    boston = load_boston()
    X = DataFrame(boston['data'], columns=boston['feature_names'])
    y = boston['target']
    model = GradientBoostingRegressor(verbose=True).fit(X, y)
    code = sklearn2code(model, ['predict'],
                        numpy_flat,
                        argument_names=X.columns)
    boston_housing_module = exec_module('boston_housing_module', code)
    assert_array_almost_equal(model.predict(X),
                              boston_housing_module.predict(**X))
コード例 #14
0
ファイル: picassoai.py プロジェクト: dejimarquis/PicassoAI
 def predict_using_local_model(self):
     gbr = GradientBoostingRegressor()
     gbr.fit(self.train_x, self.train_y)
     print('Accuracy of gbr, on the training set: ' +
           str(gbr.score(train_x, train_y)))
     start_time = time.time()
     predictions = gbr.predict(self.test_x)
     predict_time = time.time() - start_time
     print('Prediction time for gbr is ' + str(predict_time) + '\n')
     predictions = predictions.astype('uint8')
     return predictions
コード例 #15
0
ファイル: picassoai.py プロジェクト: dejimarquis/PicassoAI
 def train_model(self):
     #  Tried other model such MLP neural network regressor and random forest trees, but GBR performed best
     global train_x, train_y, test_x
     cvscore = []
     range = [4, 5, 6, 7, 8]
     for i in range:
         print(i)
         gbr = GradientBoostingRegressor(max_leaf_nodes=i)
         cv_score = cross_val_score(
             gbr, train_x, train_y,
             scoring='neg_mean_squared_error').mean()
         cvscore.append(cv_score)
     print(cvscore)
コード例 #16
0
def prediction():
    global train_x, train_y, test_x
    gbr = GradientBoostingRegressor()
    gbr.fit(train_x, train_y)
    print('Accuracy of gbr, on the training set: ' +
          str(gbr.score(train_x, train_y)))
    start_time = time.time()
    predictions = gbr.predict(test_x)
    predict_time = time.time() - start_time
    print('Prediction time for gbr is ' + str(predict_time) + '\n')
    predictions = predictions.astype('uint8')
    print(predictions)
    return predictions
コード例 #17
0
    def __init__(self, data, label, task, model_name='lgb', eval_metric=None, importance_threshold=0.0):
        '''
        :param data: DataFrame
        :param label: label name
        :param task:  任务类型, [regression, classification]
        :param model: ['gbdt', 'xgb', 'lgb']
        :param importance_threshold, 除去小于阈值的特征
        '''
        self.data = data
        self.label = label
        self.task = task
        self.model_name = model_name
        self._importance_threshold = importance_threshold

        self.model = None
        # 根据任务和label的值,设置验证准则
        self.eval_metric = None

        if model_name == 'lgb':
            if self.task == 'classification':
                self.model = lgb.LGBMClassifier(**lgb_params)
                if self.data[self.label].unique().shape[0] == 2:
                    self.eval_metric = 'logloss'
                else:
                    self.eval_metric = 'logloss'
            elif self.task == 'regression':
                self.model = lgb.LGBMRegressor(**lgb_params)
                self.eval_metric = 'l2'
            else:
                raise ValueError('Task must be either "classification" or "regression"')
        elif model_name == 'xgb':
            if self.task == 'classification':
                self.model = xgb.XGBClassifier(**xgb_params)
                if self.data[self.label].unique().shape[0] == 2:
                    self.eval_metric = 'logloss'
                else:
                    self.eval_metric = 'mlogloss'
            elif self.task == 'regression':
                self.model = xgb.XGBRegressor(**xgb_params)
                self.eval_metric = 'rmse'
            else:
                raise ValueError('Task must be either "classification" or "regression"')
        else: # gbdt
            if self.task == 'classification':
                self.model = GradientBoostingClassifier(**gbdt_params)
            elif self.task == 'regression':
                self.model = GradientBoostingRegressor(**gbdt_params)
            else:
                raise ValueError('Task must be either "classification" or "regression"')
        if not eval_metric:
            self.eval_metric = eval_metric
コード例 #18
0
def GDBT_ALL_train(trainFileName, testFileName):
    train_X, train_y, _ = ld.loadData_all(trainFileName)
    test_X, test_y, items = ld.loadData_all(testFileName)
    clf = GradientBoostingRegressor(loss='lad', n_estimators=40, learning_rate=0.1, max_depth=3).\
            fit(train_X, train_y)
    pred_y = clf.predict(test_X)
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.2f' % max(pred_y[i], 0),
            '%.2f' % max(test_y[i], 0)
        ])
    return res
コード例 #19
0
ファイル: predict.py プロジェクト: PandoraLS/python_toys
def main(train, test, filepath):
    if not filepath:
        click.echo("need filepath")
        return

    X, Y = get_data(filepath)

    if not train or not test:
        click.echo("need train or test size")
        return

    TRAIN_SIZE = 96 * int(train)
    TEST_SIZE = 96 * int(test)

    X_train = X[:TRAIN_SIZE]
    Y_train = Y[:TRAIN_SIZE]
    X_test = X[TRAIN_SIZE:]
    Y_test = Y[TRAIN_SIZE:]

    #clf = SVR(kernel='rbf', C=1e3, gamma=0.00001)
    clf = GradientBoostingRegressor(n_estimators=100, max_depth=1)
    #clf = DecisionTreeRegressor(max_depth=25)
    #clf = ExtraTreesRegressor(n_estimators=2000,max_depth=14)
    #clf = xgb.XGBRegressor(n_estimators=2000,max_depth=25)
    #clf = RandomForestRegressor(n_estimators=1000,max_depth=26,n_jobs=7)

    #clf.fit(X_train,Y_train)
    #y_pred = clf.predict(X_test)
    #plt.plot(X_test, y_pred, linestyle='-', color='red')
    predict_list = []
    for i in range(TEST_SIZE):
        X = [[x] for x in range(i, TRAIN_SIZE + i)]
        clf.fit(X, Y[i:TRAIN_SIZE + i])
        y_pred = clf.predict(np.array([TRAIN_SIZE + 1 + i]).reshape(1, -1))
        predict_list.append(y_pred)

    #print("mean_squared_error:%s"%mean_squared_error(Y_test, predict_list))
    #print("sqrt of mean_squared_error:%s"%np.sqrt(mean_squared_error(Y_test, predict_list)))
    origin_data = Y_test
    #print("origin data:%s"%origin_data)
    plt.plot([x for x in range(TRAIN_SIZE + 1, TRAIN_SIZE + TEST_SIZE + 1)],
             predict_list,
             linestyle='-',
             color='red',
             label='prediction model')
    plt.plot(X_test, Y_test, linestyle='-', color='blue', label='actual model')
    plt.legend(loc=1, prop={'size': 12})
    plt.show()
コード例 #20
0
def multi_output_regression(train, test, grid, outputs):

    # Multi-Layer Perceptron Regressor
    input_train, input_test, output_train, actual = pd.training_testing_data(
        train, test, grid, outputs)
    print('You are training on %d samples' % (len(input_train)))
    print('You are testing on %d samples' % (len(input_test)))
    multi_output_mlp = MultiOutputRegressor(
        MLPRegressor(solver='adam',
                     learning_rate='adaptive',
                     max_iter=500,
                     early_stopping=True))
    multi_output_mlp.fit(input_train, output_train)
    prediction_mlp = multi_output_mlp.predict(input_test)
    print('Multi-Layer Perceptron')
    print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_mlp)))
    print('MSE: %.5f' % (mean_squared_error(actual, prediction_mlp)))
    print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_mlp))))

    # Gradient Boosting Regressor
    input_train, input_test, output_train, actual = pd.training_testing_data(
        train, test, grid, outputs)
    print('You are training on %d samples' % (len(input_train)))
    print('You are testing on %d samples' % (len(input_test)))
    multi_output_gbr = MultiOutputRegressor(
        GradientBoostingRegressor(loss='huber'))
    multi_output_gbr.fit(input_train, output_train)
    prediction_gbr = multi_output_gbr.predict(input_test)
    print('Gradient Boosting Regressor')
    print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_gbr)))
    print('MSE: %.5f' % (mean_squared_error(actual, prediction_gbr)))
    print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_gbr))))

    # Random Forest Regressor
    input_train, input_test, output_train, actual = pd.training_testing_data(
        train, test, grid, outputs)
    print('You are training on %d samples' % (len(input_train)))
    print('You are testing on %d samples' % (len(input_test)))
    multi_output_rfr = MultiOutputRegressor(RandomForestRegressor())
    multi_output_rfr.fit(input_train, output_train)
    prediction_rfr = multi_output_rfr.predict(input_test)
    print('Random Forest Regressor')
    print(r'$R^{2}$: %.5f' % (r2_score(actual, prediction_rfr)))
    print('MSE: %.5f' % (mean_squared_error(actual, prediction_rfr)))
    print('RMSE: %.5f' % (np.sqrt(mean_squared_error(actual, prediction_rfr))))

    return actual, prediction_gbr, prediction_mlp, prediction_rfr
コード例 #21
0
def parameter_choose(train_set):
    """
    模型最佳参数选择,根据对应的训练集选择最佳模型参数
    :param train_set: 训练集
    :return: 无
    """
    X = train_set.iloc[:, 6:11]
    Y = train_set['label']
    param_test = {'n_estimators': range(10, 81, 10)}
    gsearch = GridSearchCV(
        estimator=GradientBoostingRegressor(learning_rate=1),
        param_grid=param_test,
        iid=True,
        cv=5)
    gsearch.fit(X, Y)
    print(gsearch.cv_results_)
    print(gsearch.best_params_, gsearch.best_score_)
コード例 #22
0
def model_build(train_set, weight=None):
    """
    模型建立,根据训练集,构建GBDT模型
    :param train_set: 训练集
    :param weight: 训练集label权重列表
    :return: 训练完成的model
    """
    X = train_set.iloc[:, 6:11]
    Y = train_set['label']
    #print(X.head(5))
    #print(Y.head(5))
    model = GradientBoostingRegressor()
    #model = GradientBoostingClassifier()
    if not weight:
        model.fit(X, Y)
    print(model.feature_importances_)
    #print(model)
    return model
コード例 #23
0
def gradient_boosting(train, test, label):
    gb = GradientBoostingRegressor(n_estimators=300,
                                   learning_rate=0.05,
                                   max_depth=3,
                                   max_features='sqrt',
                                   min_samples_leaf=15,
                                   min_samples_split=10,
                                   loss='huber')
    gb.fit(train, label.as_matrix().ravel())

    # prediction on training data
    y_predicton = gb.predict(train)
    y_test = label
    print("Gradient Boosting score on training set: ",
          rmse(y_test, y_predicton))

    y_prediction = gb.predict(test)
    y_prediction = np.exp(y_prediction)
    return y_prediction
コード例 #24
0
def compare_algorithms(datasetName, data, target):
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        target,
                                                        test_size=0.2,
                                                        random_state=1)
    params = {
        'n_estimators': [10, 20, 30, 40],
        'loss': ['ls', 'huber'],
        'min_samples_leaf': [6],
        'max_depth': [3, 4, 5, 6]
    }

    print("\n\nTraining GBRT on %s..." % datasetName)
    clf = GridSearchCV(GradientBoostingRegressor(), params, cv=5, n_jobs=-1)
    clf.fit(X_train, y_train)
    print("Best params original: %s" % clf.best_params_)
    print("Avg train time original: %s seconds" %
          clf.cv_results_["mean_fit_time"][clf.best_index_])
    bestOriginal = clf.best_estimator_

    myclf = GridSearchCV(MyGradientBoostingRegressor(),
                         params,
                         cv=5,
                         n_jobs=-1)
    myclf.fit(X_train, y_train)
    print("Best params mine: %s" % myclf.best_params_)
    print("Avg train time mine: %s seconds" %
          myclf.cv_results_["mean_fit_time"][myclf.best_index_])
    bestMine = myclf.best_estimator_

    originalPredictions = bestOriginal.predict(X_test)
    myPredicttions = bestMine.predict(X_test)
    print("The dataset: %s with %s train instances" %
          (datasetName, data.shape[0]))
    print("Original GradientBoostingRegressor R2: %s\tMSE: %s\tMAE: %s" %
          (r2_score(y_test, originalPredictions),
           mean_squared_error(y_test, originalPredictions),
           mean_absolute_error(y_test, originalPredictions)))
    print("My GradientBoostingRegressor R2: %s\tMSE: %s\tMAE: %s" %
          (r2_score(y_test, myPredicttions),
           mean_squared_error(y_test, myPredicttions),
           mean_absolute_error(y_test, myPredicttions)))
コード例 #25
0
ファイル: ModelProblem.py プロジェクト: HelloAtomorrow/Ad
def model_build(train_set, weight=None):
    """
    模型建立,根据训练集,构建GBDT模型
    :param train_set: 训练集
    :param weight: 训练集label权重列表
    :return: 训练完成的model
    """
    X = train_set.iloc[:, 1:]
    print(len(X))
    Y = train_set['label']
    print(len(Y))
    #print(X.head(5))
    #print(Y.head(5))
    model = GradientBoostingRegressor()
    #model = GradientBoostingClassifier()
    #model = logistic_regression_path(X, Y)
    model.fit(X, Y)
    print(model.feature_importances_)
    #print(model)
    return model
def trainmodels():
    global n_folds,model_br,model_dic,model_etc,model_gbr,model_lr,model_names,\
           model_svr,cv_score_list,pre_y_list
    n_folds = 6  # 设置交叉检验的次数
    model_br = BayesianRidge()  # 建立贝叶斯岭回归模型对象
    model_lr = LinearRegression()  # 建立普通线性回归模型对象
    model_etc = ElasticNet()  # 建立弹性网络回归模型对象
    model_svr = SVR()  # 建立支持向量机回归模型对象
    model_gbr = GradientBoostingRegressor()  # 建立梯度增强回归模型对象
    model_names = [
        'BayesianRidge', 'LinearRegression', 'ElasticNet', 'SVR', 'GBR'
    ]  # 不同模型的名称列表
    model_dic = [model_br, model_lr, model_etc, model_svr,
                 model_gbr]  # 不同回归模型对象的集合
    cv_score_list = []  # 交叉检验结果列表
    pre_y_list = []  # 各个回归模型预测的y值列表
    for model in model_dic:  # 读出每个回归模型对象
        scores = cross_val_score(model, X, y,
                                 cv=n_folds)  # 将每个回归模型导入交叉检验模型中做训练检验
        cv_score_list.append(scores)  # 将交叉检验结果存入结果列表
        pre_y_list.append(model.fit(X, y).predict(X))  # 将回归训练中得到的预测y存入列表
コード例 #27
0
ファイル: mathtest.py プロジェクト: zaydmath/present
def run_experiment(writer, name, generate_data):
    np.random.seed(SEED)

    data = DataHolder(generate_data(TRAIN_SIZE), generate_data(TEST_SIZE))

    if DUMP_FILES:
        data.dump(name)

    # Define model types to use
    models = [
        svr_grid(),
        RandomForestRegressor(n_estimators=100),
        GradientBoostingRegressor(n_estimators=100,
                                  learning_rate=0.1,
                                  max_depth=10,
                                  random_state=0,
                                  verbose=VERBOSE),
        KerasRegressor(build_fn=neural_network_regression, data=data)
    ]

    for model in models:
        eval_data(writer, name, model, data)
コード例 #28
0
ファイル: regression.py プロジェクト: redinton/ML_WHEEL
 	def regular_model(self):
		model_br = BayesianRidge()  
		model_lr = LinearRegression()  
		model_etc = ElasticNet()  
		model_las = Lasso()
		model_rid = Ridge()
		model_sgd = SGDRegressor()
		model_svr = SVR() 
		model_gbr = GradientBoostingRegressor()
		model_rfr = RandomForestRegressor()

		model_names = ['BayesianRidge','LinearRegression','ElasticNet','Lasso','Ridge',
					   'SGDRegressor','SVR','GradientBoostingRegressor','RandomForestRegressor']  

		model_dic = [model_br,model_lr,model_etc,model_las,model_rid,model_sgd,model_svr,model_gbr,model_rfr] 

		
		result_dict ={}
		for  i,clf in enumerate(model_dic):
        	value = cross_validate(clf,self.X_train,self.y_train)
        	result_dict[model_names[i]] = value
        
       	result_dict = sorted(result_dict.items(),key = lambda x:x[1],reverse = True)
		print (result_dict) 
#!/usr/bin/env python 

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor
from sklearn import datasets
from sklearn.utils import shuffle
import numpy as np

boston = datasets.load_boston()
X, Y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, Y_train = X[:offset], Y[:offset]
X_test, Y_test = X[offset:], Y[offset:]

regressor = GradientBoostingRegressor(n_estimators=120, learning_rate=0.2,max_depth=2, random_state=0, loss='ls')
regressor.fit(X_train,Y_train)
score = regressor.score(X_test,Y_test)
print(score)
コード例 #30
0
rsqrd_svm = r2_score(Y_test, y_pred_svm)
mae_svm = mean_absolute_error(Y_test, y_pred_svm)

#RF Algorithm
from sklearn.ensemble import RandomForestRegressor
regressor_rf = RandomForestRegressor(n_estimators=20, random_state=0)
regressor_rf.fit(X_train, Y_train)
y_pred_rf = regressor_rf.predict(X_test)
rms_rf = sqrt(mean_squared_error(Y_test, y_pred_rf))
rsqrd_rf = r2_score(Y_test, y_pred_rf)
mae_rf = mean_absolute_error(Y_test, y_pred_rf)

#GB Algorithm
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor
regressor_gb = GradientBoostingRegressor(learning_rate=0.5,
                                         n_estimators=400,
                                         loss='ls')
regressor_gb.fit(X_train, Y_train)
y_pred_gb = regressor_gb.predict(X_test)
rms_gb = sqrt(mean_squared_error(Y_test, y_pred_gb))
rsqrd_gb = r2_score(Y_test, y_pred_gb)
mae_gb = mean_absolute_error(Y_test, y_pred_gb)

#Multiple Linear Regression
from sklearn.linear_model import LinearRegression
regressor_lr = LinearRegression()
regressor_lr.fit(X_train, Y_train)
y_pred_lr = regressor_lr.predict(X_test)
rms_lr = sqrt(mean_squared_error(Y_test, y_pred_lr))
rsqrd_lr = r2_score(Y_test, y_pred_lr)
mae_lr = mean_absolute_error(Y_test, y_pred_lr)