def test_prediction(): model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset) output = model.predict() assert len(output.shape) == 1 assert model.dataset.X_test.shape[0] == output.shape[0] # Retrieve cached object output = model.predict() assert len(output.shape) == 1 assert model.dataset.X_test.shape[0] == output.shape[0]
from heamy.estimator import Regressor from heamy.pipeline import ModelsPipeline from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error data = load_boston() X, y = data['data'], data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=2) # create dataset Data = Dataset(X_train,y_train,X_test) # initialize RandomForest & LinearRegression RfModel = Regressor(dataset=Data, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf') LRModel = Regressor(dataset=Data, estimator=LinearRegression, parameters={'normalize': True},name='lr') # Stack two models # Returns new dataset with out-of-fold predictions Pipeline = ModelsPipeline(RfModel,LRModel) StackModel = Pipeline.stack(k=10,seed=2) # Train LinearRegression on stacked data (second stage) Stacker = Regressor(dataset=StackModel, estimator=LinearRegression) Results = Stacker.predict() # Validate results using 10 fold cross-validation Results = Stacker.validate(k=10,scorer=mean_absolute_error)
estimator=xgb_feature3, name='xgb3', use_cache=False) model_lgb = Regressor(dataset=lgb_dataset, estimator=lgb_feature, name='lgb', use_cache=False) model_gbdt = Regressor(dataset=xgb_dataset, estimator=gbdt_model, name='gbdt', use_cache=False) pipeline = ModelsPipeline(model_xgb, model_xgb2, model_xgb3, model_lgb, model_gbdt) stack_ds = pipeline.stack(k=5, seed=111, add_diff=False, full_test=True) stacker = Regressor(dataset=stack_ds, estimator=LinearRegression, parameters={'fit_intercept': False}) predict_result = stacker.predict() ans = pd.read_csv('../AI_risk_test_V3.0/test_list.csv', parse_dates=['appl_sbm_tm']) ans['PROB'] = predict_result ans = ans.drop(['appl_sbm_tm'], axis=1) minmin, maxmax = min(ans['PROB']), max(ans['PROB']) ans['PROB'] = ans['PROB'].map(lambda x: (x - minmin) / (maxmax - minmin)) ans['PROB'] = ans['PROB'].map(lambda x: '%.4f' % x) ans.to_csv('./ans_stacking.csv', index=None)
estimator=RandomForestRegressor, parameters={'n_estimators': 50}, name='rf') model_lr = Regressor(dataset=dataset, estimator=LinearRegression, parameters={'normalize': True}, name='lr') # Stack two models # Returns new dataset with out-of-fold predictions pipeline = ModelsPipeline(model_rf, model_lr) stack_ds = pipeline.stack(k=10, seed=111) # Train LinearRegression on stacked data (second stage) stacker = Regressor(dataset=stack_ds, estimator=LinearRegression) results = stacker.predict() # Validate results using 10 fold cross-validation results = stacker.validate(k=10, scorer=mean_absolute_error) #blend # load boston dataset from sklearn from sklearn.datasets import load_boston data = load_boston() X, y = data['data'], data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=111) # create dataset dataset = Dataset(X_train, y_train, X_test)
model_ext = Regressor(dataset=dataset, estimator=ext, parameters=params_ext, name='ext') model_rcv = Regressor(dataset=dataset, estimator=rcv, parameters=params_rcv, name='rcv') #model_lascv = Regressor(dataset=dataset, estimator=lascv, parameters=params_lascv,name='lascv') pipeline = ModelsPipeline(model_rf, model_rcv, model_ext) stack_ds = pipeline.stack(k=5, seed=111) stacker = Regressor(dataset=stack_ds, estimator=Lasso, parameters=params_las) y_pre = stacker.predict() y_pre_last = np.append(y_pre, y_pre) y_pre_last[10] * 1.08 ### #loss_gbrt = Evaluation([y_pre_gbrt],[y_test]) output(fw, i + 1, y_pre_last) ''' if loss_gbrt>0.015: output(fw_gbrt,i+1,y_pre_rf) fw_gbrt.write(str(i+1)+',gbrt,'+str(loss_gbrt)+'\n') ''' ''' plt.scatter(xday[-364:-14],y_train) plt.scatter(xday[-14:],y_test,color = 'green') plt.plot(xday[-14:],y_pre_rf,color = 'red') path = "d://tianchi_koubei/fig/rf_train/"+str(i+1)+'.png'
# create dataset dataset = Dataset(X_train,y_train,X_test) # initialize RandomForest & LinearRegression model_rf = Regressor(dataset=dataset, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf') model_lr = Regressor(dataset=dataset, estimator=LinearRegression, parameters={'normalize': True},name='lr') # Stack two models # Returns new dataset with out-of-fold predictions pipeline = ModelsPipeline(model_rf,model_lr) stack_ds = pipeline.stack(k=10,seed=111) # Train LinearRegression on stacked data (second stage) stacker = Regressor(dataset=stack_ds, estimator=LinearRegression) results = stacker.predict() # Validate results using 10 fold cross-validation results = stacker.validate(k=10,scorer=mean_absolute_error) #blend # load boston dataset from sklearn from sklearn.datasets import load_boston data = load_boston() X, y = data['data'], data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=111) # create dataset dataset = Dataset(X_train,y_train,X_test)
'custom_metric': ['MAE'], 'random_seed': seed, 'logging_level': 'Silent' }, name='cr') ] # pipelineを定義、2nd levelデータセットの作成 pipeline = ModelsPipeline(*models) stack_ds = pipeline.stack(k=10, seed=seed) # modelを作ってvalidation stacker = Regressor(dataset=stack_ds, estimator=LinearRegression) y_trues, y_preds = stacker.validate(k=10) # 精度出力 # X_testを使ってpredict y_pred = stacker.predict() print(y_pred) sum = 0 buy = 0 for i, yosoku in enumerate(x_test): if stacker.predict(x_test[i]) < 0.3: if t_test[i] == 0: sum += z_test[i] buy += 1 print(sum) print(buy) print(sum / buy)
#model_rf2 = Regressor(dataset=dataset, estimator=rf2, parameters=params_rf2,name='rf2') model_rcv = Regressor(dataset=dataset, estimator=rcv, parameters=params_rcv,name='rcv') #model_gbrt = Regressor(dataset=dataset, estimator=gbrt, parameters=params_gbrt,name='gbrt') #model_lascv = Regressor(dataset=dataset, estimator=lascv, parameters=params_lascv,name='lascv') model_br = Regressor(dataset=dataset, estimator=br, parameters=params_br,name='br') model_knn = Regressor(dataset=dataset, estimator=knn, parameters=params_knn,name='knn') #blending = pipeline.blend(proportion=0.3,seed=111) params_las = {'alpha':1.7} params_rcv2 = {'cv':5,'normalize':True,'gcv_mode':'auto','scoring':'neg_mean_squared_error'} params_lascv = {'max_iter':500,'cv':8} pipeline = ModelsPipeline(model_rf1,model_knn) stack_ds = pipeline.stack(k=5,seed=111) stacker = Regressor(dataset=stack_ds,estimator=LassoCV, parameters=params_lascv) y_pre = stacker.predict() pipeline2 = ModelsPipeline(model_rf1,model_knn) stack_ds2 = pipeline2.blend(seed=111) blending = Regressor(dataset=stack_ds2,estimator=LassoCV, parameters=params_lascv) y_pre2 = blending.predict() blending_pre.append(y_pre2) #print(y_pre) #y_pre = pipeline.blend() #print(y_pre) ### #loss_stack = Evaluation([y_pre],[y_test]) stacking_pre.append(y_pre)