def train_and_predict(self, X_train, X_valid, y_train, y_valid, X_test, catboost_params): # データセットを生成する train_pool = Pool(X_train, label=y_train) valid_pool = Pool(X_valid, label=y_valid) evals = [valid_pool] model = CatBoost(catboost_params) # ロガーの作成 with timer("Training"): # 上記のパラメータでモデルを学習する model.fit( train_pool, cat_features= data.get_category_columns(X_train), eval_set=evals, # 50 ラウンド経過しても性能が向上しないときは学習を打ち切る early_stopping_rounds=50, ) y_train_pred = model.predict(train_pool).reshape(-1,1) #検証データの予測 y_valid_pred = model.predict(valid_pool).reshape(-1,1) # テストデータの予測 y_pred = model.predict(X_test).reshape(-1,1) return y_train_pred, y_valid_pred, y_pred, model
def train_and_predict(self, X_train, X_valid, y_train, y_valid, X_test, xgbm_params): # データセットを生成する dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_valid, label=y_valid) evals = [(dtrain, 'train'), (dtest, 'eval')] evals_result = {} # ロガーの作成 logger = logging.getLogger('main') callbacks = [log_evaluation_xgbm(logger, period=50)] with timer("Training"): # 上記のパラメータでモデルを学習する model = xgb.train( xgbm_params, dtrain, evals=evals, evals_result=evals_result, num_boost_round=1000, # 30 ラウンド経過しても性能が向上しないときは学習を打ち切る early_stopping_rounds=50, # ログ callbacks=callbacks) y_train_pred = model.predict(xgb.DMatrix(X_train)).reshape(-1, 1) #検証データの予測 y_valid_pred = model.predict(xgb.DMatrix(X_valid)).reshape(-1, 1) # テストデータの予測 y_pred = model.predict(xgb.DMatrix(X_test)).reshape(-1, 1) return y_train_pred, y_valid_pred, y_pred, model
def run(self): with timer(self.name): self.create_features() prefix = self.prefix + '_' if self.prefix else '' suffix = '_' + self.suffix if self.suffix else '' self.train.columns = prefix + self.train.columns + suffix self.test.columns = prefix + self.test.columns + suffix return self
def run(self): with timer(self.name): self.create_features() self.historical_transactions = reduce_mem_usage( self.historical_transactions) self.new_merchant_transactions = reduce_mem_usage( self.new_merchant_transactions) prefix = self.prefix + '_' if self.prefix else '' suffix = '_' + self.suffix if self.suffix else '' self.historical_transactions.columns = prefix + self.historical_transactions.columns + suffix self.new_merchant_transactions.columns = prefix + self.new_merchant_transactions.columns + suffix return self
def train_and_predict(self, X_train, X_valid, y_train, y_valid, X_test, lasso_params): # ロガーの作成 logger = logging.getLogger('main') #標準化 sc = StandardScaler() X_train_std = sc.fit_transform(X_train.values) X_valid_std = sc.fit_transform(X_valid.values) X_test_std = sc.fit_transform(X_test.values) with timer("Training"): model = Lasso(**lasso_params) model.fit(X_train_std, y_train) y_train_pred = model.predict(X_train_std).reshape(-1,1) #検証データの予測 y_valid_pred = model.predict(X_valid_std).reshape(-1,1) #テストデータの予測 y_test_pred = model.predict(X_test_std).reshape(-1,1) return y_train_pred, y_valid_pred, y_test_pred, model
def train_and_predict(self, X_train, X_valid, y_train, y_valid, X_test, lgbm_params): # データセットを生成する lgb_train = lgb.Dataset(X_train, y_train) lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train) evals_result = {} # ロガーの作成 logger = logging.getLogger('main') callbacks = [log_evaluation_lgbm(logger, period=50)] with timer("Training"): # 上記のパラメータでモデルを学習する model = lgb.train( lgbm_params, lgb_train, # モデルの評価用データを渡す valid_sets=lgb_eval, evals_result=evals_result, # 最大で 1000 ラウンドまで学習する num_boost_round=1000, # 30 ラウンド経過しても性能が向上しないときは学習を打ち切る early_stopping_rounds=50, # ログ callbacks=callbacks) y_train_pred = model.predict( X_train, num_iteration=model.best_iteration).reshape(-1, 1) #検証データの予測 y_valid_pred = model.predict( X_valid, num_iteration=model.best_iteration).reshape(-1, 1) # テストデータの予測 y_pred = model.predict(X_test, num_iteration=model.best_iteration).reshape( -1, 1) return y_train_pred, y_valid_pred, y_pred, model