def test_validation(): model = Regressor(estimator=LinearRegression, parameters={}, dataset=RealDataset) model.validate(k=10) # Retrieve cached object y_true, y_pred = model.validate(k=10) assert len(y_true) == len(y_pred) model.validate(k=1) # Retrieve cached object y_true, y_pred = model.validate(k=1) assert len(y_true) == len(y_pred) assert len(y_true) == 1
from heamy.estimator import Regressor from heamy.pipeline import ModelsPipeline from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error data = load_boston() X, y = data['data'], data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=2) # create dataset Data = Dataset(X_train,y_train,X_test) # initialize RandomForest & LinearRegression RfModel = Regressor(dataset=Data, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf') LRModel = Regressor(dataset=Data, estimator=LinearRegression, parameters={'normalize': True},name='lr') # Stack two models # Returns new dataset with out-of-fold predictions Pipeline = ModelsPipeline(RfModel,LRModel) StackModel = Pipeline.stack(k=10,seed=2) # Train LinearRegression on stacked data (second stage) Stacker = Regressor(dataset=StackModel, estimator=LinearRegression) Results = Stacker.predict() # Validate results using 10 fold cross-validation Results = Stacker.validate(k=10,scorer=mean_absolute_error)
dataset = Dataset(X_train,y_train,X_test) # initialize RandomForest & LinearRegression model_rf = Regressor(dataset=dataset, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf') model_lr = Regressor(dataset=dataset, estimator=LinearRegression, parameters={'normalize': True},name='lr') # Stack two models # Returns new dataset with out-of-fold predictions pipeline = ModelsPipeline(model_rf,model_lr) stack_ds = pipeline.stack(k=10,seed=111) # Train LinearRegression on stacked data (second stage) stacker = Regressor(dataset=stack_ds, estimator=LinearRegression) results = stacker.predict() # Validate results using 10 fold cross-validation results = stacker.validate(k=10,scorer=mean_absolute_error) #blend # load boston dataset from sklearn from sklearn.datasets import load_boston data = load_boston() X, y = data['data'], data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=111) # create dataset dataset = Dataset(X_train,y_train,X_test) # initialize RandomForest & LinearRegression model_rf = Regressor(dataset=dataset, estimator=RandomForestRegressor, parameters={'n_estimators': 50},name='rf')
estimator=CatBoostRegressor, parameters={ 'custom_metric': ['MAE'], 'random_seed': seed, 'logging_level': 'Silent' }, name='cr') ] # pipelineを定義、2nd levelデータセットの作成 pipeline = ModelsPipeline(*models) stack_ds = pipeline.stack(k=10, seed=seed) # modelを作ってvalidation stacker = Regressor(dataset=stack_ds, estimator=LinearRegression) y_trues, y_preds = stacker.validate(k=10) # 精度出力 # X_testを使ってpredict y_pred = stacker.predict() print(y_pred) sum = 0 buy = 0 for i, yosoku in enumerate(x_test): if stacker.predict(x_test[i]) < 0.3: if t_test[i] == 0: sum += z_test[i] buy += 1 print(sum)
estimator=xgb.XGBRegressor, parameters={ 'n_estimators': 50, 'learning_rate': 0.05, 'max_depth': 3 }, name='xgb') # stack两个模型 pipeline = ModelsPipeline(model_lr, model_rf, model_gbdt, model_xgb) stack_ds = pipeline.stack(k=10, seed=111) # 第二层使用xgboost模型stack stacker = Regressor(dataset=stack_ds, estimator=xgb.XGBRegressor) results = stacker.predict() # 使用10折交叉验证结果 results10 = stacker.validate(k=10, scorer=mean_squared_error) print("r2_score: %f" % r2_score(y_test, results)) test_y = pd.DataFrame(y_test) predictions = pd.DataFrame(results) data = pd.concat([data_XX, data_drop], axis=1) data = pd.concat([data, data_y], axis=1) data = pd.concat([data, predictions], axis=1) data = np.array(data) with open('C:/20180402_pre_test.csv', 'w') as f: header = [ 'c01', 'c02', 'c03', 'c05', 'c06', 'c10', 'c24', 'c25', 'c26', 'c27', 'c28', 'c04', 'pred' ] writer = csv.writer(f, delimiter=",") writer.writerow(header)