Ejemplo n.º 1
0
def test_prediction():
    model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset)
    output = model.predict()
    assert len(output.shape) == 1
    assert model.dataset.X_test.shape[0] == output.shape[0]

    # Retrieve cached object
    output = model.predict()
    assert len(output.shape) == 1
    assert model.dataset.X_test.shape[0] == output.shape[0]
Ejemplo n.º 2
0
def test_prediction():
    model = Regressor(estimator=LinearRegression,
                      parameters={},
                      dataset=RealDataset)
    output = model.predict()
    assert len(output.shape) == 1
    assert model.dataset.X_test.shape[0] == output.shape[0]

    # Retrieve cached object
    output = model.predict()
    assert len(output.shape) == 1
    assert model.dataset.X_test.shape[0] == output.shape[0]
from heamy.estimator import Regressor
from heamy.pipeline import ModelsPipeline

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
data = load_boston()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=2)

# create dataset
Data = Dataset(X_train,y_train,X_test)

# initialize RandomForest & LinearRegression
RfModel = Regressor(dataset=Data, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf')
LRModel = Regressor(dataset=Data, estimator=LinearRegression, parameters={'normalize': True},name='lr')


# Stack two models
# Returns new dataset with out-of-fold predictions
Pipeline = ModelsPipeline(RfModel,LRModel)
StackModel = Pipeline.stack(k=10,seed=2)

# Train LinearRegression on stacked data (second stage)
Stacker = Regressor(dataset=StackModel, estimator=LinearRegression)
Results = Stacker.predict()
# Validate results using 10 fold cross-validation
Results = Stacker.validate(k=10,scorer=mean_absolute_error)
                               estimator=xgb_feature3,
                               name='xgb3',
                               use_cache=False)
        model_lgb = Regressor(dataset=lgb_dataset,
                              estimator=lgb_feature,
                              name='lgb',
                              use_cache=False)
        model_gbdt = Regressor(dataset=xgb_dataset,
                               estimator=gbdt_model,
                               name='gbdt',
                               use_cache=False)
        pipeline = ModelsPipeline(model_xgb, model_xgb2, model_xgb3, model_lgb,
                                  model_gbdt)
        stack_ds = pipeline.stack(k=5,
                                  seed=111,
                                  add_diff=False,
                                  full_test=True)
        stacker = Regressor(dataset=stack_ds,
                            estimator=LinearRegression,
                            parameters={'fit_intercept': False})
        predict_result = stacker.predict()
        ans = pd.read_csv('../AI_risk_test_V3.0/test_list.csv',
                          parse_dates=['appl_sbm_tm'])
        ans['PROB'] = predict_result
        ans = ans.drop(['appl_sbm_tm'], axis=1)
        minmin, maxmax = min(ans['PROB']), max(ans['PROB'])
        ans['PROB'] = ans['PROB'].map(lambda x: (x - minmin) /
                                      (maxmax - minmin))
        ans['PROB'] = ans['PROB'].map(lambda x: '%.4f' % x)
        ans.to_csv('./ans_stacking.csv', index=None)
Ejemplo n.º 5
0
                     estimator=RandomForestRegressor,
                     parameters={'n_estimators': 50},
                     name='rf')
model_lr = Regressor(dataset=dataset,
                     estimator=LinearRegression,
                     parameters={'normalize': True},
                     name='lr')

# Stack two models
# Returns new dataset with out-of-fold predictions
pipeline = ModelsPipeline(model_rf, model_lr)
stack_ds = pipeline.stack(k=10, seed=111)

# Train LinearRegression on stacked data (second stage)
stacker = Regressor(dataset=stack_ds, estimator=LinearRegression)
results = stacker.predict()
# Validate results using 10 fold cross-validation
results = stacker.validate(k=10, scorer=mean_absolute_error)

#blend
# load boston dataset from sklearn
from sklearn.datasets import load_boston
data = load_boston()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    random_state=111)

# create dataset
dataset = Dataset(X_train, y_train, X_test)
Ejemplo n.º 6
0
    model_ext = Regressor(dataset=dataset,
                          estimator=ext,
                          parameters=params_ext,
                          name='ext')
    model_rcv = Regressor(dataset=dataset,
                          estimator=rcv,
                          parameters=params_rcv,
                          name='rcv')
    #model_lascv = Regressor(dataset=dataset, estimator=lascv, parameters=params_lascv,name='lascv')
    pipeline = ModelsPipeline(model_rf, model_rcv, model_ext)
    stack_ds = pipeline.stack(k=5, seed=111)

    stacker = Regressor(dataset=stack_ds,
                        estimator=Lasso,
                        parameters=params_las)
    y_pre = stacker.predict()
    y_pre_last = np.append(y_pre, y_pre)
    y_pre_last[10] * 1.08
    ###
    #loss_gbrt = Evaluation([y_pre_gbrt],[y_test])
    output(fw, i + 1, y_pre_last)
    '''
    if loss_gbrt>0.015:
        output(fw_gbrt,i+1,y_pre_rf)
        fw_gbrt.write(str(i+1)+',gbrt,'+str(loss_gbrt)+'\n')
        '''
    '''
    plt.scatter(xday[-364:-14],y_train)
    plt.scatter(xday[-14:],y_test,color = 'green')
    plt.plot(xday[-14:],y_pre_rf,color = 'red')
    path = "d://tianchi_koubei/fig/rf_train/"+str(i+1)+'.png'
Ejemplo n.º 7
0
# create dataset
dataset = Dataset(X_train,y_train,X_test)

# initialize RandomForest & LinearRegression
model_rf = Regressor(dataset=dataset, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf')
model_lr = Regressor(dataset=dataset, estimator=LinearRegression, parameters={'normalize': True},name='lr')

# Stack two models
# Returns new dataset with out-of-fold predictions
pipeline = ModelsPipeline(model_rf,model_lr)
stack_ds = pipeline.stack(k=10,seed=111)

# Train LinearRegression on stacked data (second stage)
stacker = Regressor(dataset=stack_ds, estimator=LinearRegression)
results = stacker.predict()
# Validate results using 10 fold cross-validation
results = stacker.validate(k=10,scorer=mean_absolute_error)



#blend
# load boston dataset from sklearn
from sklearn.datasets import load_boston
data = load_boston()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=111)

# create dataset
dataset = Dataset(X_train,y_train,X_test)
Ejemplo n.º 8
0
                  'custom_metric': ['MAE'],
                  'random_seed': seed,
                  'logging_level': 'Silent'
              },
              name='cr')
]

# pipelineを定義、2nd levelデータセットの作成
pipeline = ModelsPipeline(*models)
stack_ds = pipeline.stack(k=10, seed=seed)

# modelを作ってvalidation
stacker = Regressor(dataset=stack_ds, estimator=LinearRegression)
y_trues, y_preds = stacker.validate(k=10)

# 精度出力
# X_testを使ってpredict
y_pred = stacker.predict()
print(y_pred)

sum = 0
buy = 0
for i, yosoku in enumerate(x_test):
    if stacker.predict(x_test[i]) < 0.3:
        if t_test[i] == 0:
            sum += z_test[i]
        buy += 1

print(sum)
print(buy)
print(sum / buy)
Ejemplo n.º 9
0
    #model_rf2 = Regressor(dataset=dataset, estimator=rf2, parameters=params_rf2,name='rf2')
    model_rcv = Regressor(dataset=dataset, estimator=rcv, parameters=params_rcv,name='rcv')
    #model_gbrt = Regressor(dataset=dataset, estimator=gbrt, parameters=params_gbrt,name='gbrt')
    #model_lascv = Regressor(dataset=dataset, estimator=lascv, parameters=params_lascv,name='lascv')
    model_br = Regressor(dataset=dataset, estimator=br, parameters=params_br,name='br')
    model_knn = Regressor(dataset=dataset, estimator=knn, parameters=params_knn,name='knn')
    
    #blending = pipeline.blend(proportion=0.3,seed=111)
    params_las = {'alpha':1.7}
    params_rcv2 = {'cv':5,'normalize':True,'gcv_mode':'auto','scoring':'neg_mean_squared_error'}
    params_lascv = {'max_iter':500,'cv':8}

    pipeline = ModelsPipeline(model_rf1,model_knn)
    stack_ds = pipeline.stack(k=5,seed=111)
    stacker = Regressor(dataset=stack_ds,estimator=LassoCV, parameters=params_lascv)
    y_pre = stacker.predict()

    pipeline2 = ModelsPipeline(model_rf1,model_knn)
    stack_ds2 = pipeline2.blend(seed=111)
    blending =  Regressor(dataset=stack_ds2,estimator=LassoCV, parameters=params_lascv)
    y_pre2 = blending.predict()
    blending_pre.append(y_pre2)

   

    #print(y_pre)
    #y_pre = pipeline.blend()
    #print(y_pre)
    ###
    #loss_stack = Evaluation([y_pre],[y_test])
    stacking_pre.append(y_pre)