def test_validation():
    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    model.validate(k=10)
    # Retrieve cached object
    y_true, y_pred = model.validate(k=10)
    assert len(y_true) == len(y_pred)

    model.validate(k=1)
    # Retrieve cached object
    y_true, y_pred = model.validate(k=1)
    assert len(y_true) == len(y_pred)
    assert len(y_true) == 1
Beispiel #2
0
def test_validation():
    model = Regressor(estimator=LinearRegression,
                      parameters={},
                      dataset=RealDataset)
    model.validate(k=10)
    # Retrieve cached object
    y_true, y_pred = model.validate(k=10)
    assert len(y_true) == len(y_pred)

    model.validate(k=1)
    # Retrieve cached object
    y_true, y_pred = model.validate(k=1)
    assert len(y_true) == len(y_pred)
    assert len(y_true) == 1
from heamy.estimator import Regressor
from heamy.pipeline import ModelsPipeline

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
data = load_boston()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=2)

# create dataset
Data = Dataset(X_train,y_train,X_test)

# initialize RandomForest & LinearRegression
RfModel = Regressor(dataset=Data, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf')
LRModel = Regressor(dataset=Data, estimator=LinearRegression, parameters={'normalize': True},name='lr')


# Stack two models
# Returns new dataset with out-of-fold predictions
Pipeline = ModelsPipeline(RfModel,LRModel)
StackModel = Pipeline.stack(k=10,seed=2)

# Train LinearRegression on stacked data (second stage)
Stacker = Regressor(dataset=StackModel, estimator=LinearRegression)
Results = Stacker.predict()
# Validate results using 10 fold cross-validation
Results = Stacker.validate(k=10,scorer=mean_absolute_error)
dataset = Dataset(X_train,y_train,X_test)

# initialize RandomForest & LinearRegression
model_rf = Regressor(dataset=dataset, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf')
model_lr = Regressor(dataset=dataset, estimator=LinearRegression, parameters={'normalize': True},name='lr')

# Stack two models
# Returns new dataset with out-of-fold predictions
pipeline = ModelsPipeline(model_rf,model_lr)
stack_ds = pipeline.stack(k=10,seed=111)

# Train LinearRegression on stacked data (second stage)
stacker = Regressor(dataset=stack_ds, estimator=LinearRegression)
results = stacker.predict()
# Validate results using 10 fold cross-validation
results = stacker.validate(k=10,scorer=mean_absolute_error)



#blend
# load boston dataset from sklearn
from sklearn.datasets import load_boston
data = load_boston()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=111)

# create dataset
dataset = Dataset(X_train,y_train,X_test)

# initialize RandomForest & LinearRegression
model_rf = Regressor(dataset=dataset, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf')
Beispiel #5
0
              estimator=CatBoostRegressor,
              parameters={
                  'custom_metric': ['MAE'],
                  'random_seed': seed,
                  'logging_level': 'Silent'
              },
              name='cr')
]

# pipelineを定義、2nd levelデータセットの作成
pipeline = ModelsPipeline(*models)
stack_ds = pipeline.stack(k=10, seed=seed)

# modelを作ってvalidation
stacker = Regressor(dataset=stack_ds, estimator=LinearRegression)
y_trues, y_preds = stacker.validate(k=10)

# 精度出力
# X_testを使ってpredict
y_pred = stacker.predict()
print(y_pred)

sum = 0
buy = 0
for i, yosoku in enumerate(x_test):
    if stacker.predict(x_test[i]) < 0.3:
        if t_test[i] == 0:
            sum += z_test[i]
        buy += 1

print(sum)
Beispiel #6
0
                      estimator=xgb.XGBRegressor,
                      parameters={
                          'n_estimators': 50,
                          'learning_rate': 0.05,
                          'max_depth': 3
                      },
                      name='xgb')

# stack两个模型
pipeline = ModelsPipeline(model_lr, model_rf, model_gbdt, model_xgb)
stack_ds = pipeline.stack(k=10, seed=111)
# 第二层使用xgboost模型stack
stacker = Regressor(dataset=stack_ds, estimator=xgb.XGBRegressor)
results = stacker.predict()
# 使用10折交叉验证结果
results10 = stacker.validate(k=10, scorer=mean_squared_error)
print("r2_score: %f" % r2_score(y_test, results))

test_y = pd.DataFrame(y_test)
predictions = pd.DataFrame(results)
data = pd.concat([data_XX, data_drop], axis=1)
data = pd.concat([data, data_y], axis=1)
data = pd.concat([data, predictions], axis=1)
data = np.array(data)
with open('C:/20180402_pre_test.csv', 'w') as f:
    header = [
        'c01', 'c02', 'c03', 'c05', 'c06', 'c10', 'c24', 'c25', 'c26', 'c27',
        'c28', 'c04', 'pred'
    ]
    writer = csv.writer(f, delimiter=",")
    writer.writerow(header)