def __call__(self): # cvで画像を分ける print("Training") if self.get("recreate_cv"): make_cv( train_df=pd.read_csv(osp.join(self.ROOT, "input", self.raw_dirname, "train.csv")), cv_type=self.get("cv"), out_path=osp.join(self.ROOT, "src", "cvs"), n_splits=self.get("n_splits"), seeds=self.get("seeds"), ) if self.get("train_flag"): for seed in self.seeds: # train by seed seed_everything(seed) cv_df = pd.read_csv(osp.join(self.cv_path, f"{self.get('cv')}_{seed}.csv")) for fold in self.get("run_folds"): train_df = cv_df[(cv_df["fold"] != fold) & (cv_df["fold"] != -1)] val_df = cv_df[cv_df["fold"] == fold] self.train(train_df, val_df, seed, fold)
pos_rate = float(sum(target)) / target.shape[0] logger.info('shape %s %s' % data.shape) logger.info('pos num: %s, pos rate: %s' % (sum(target), pos_rate)) all_params = { 'max_depth': [9], 'n_estimators': [150], 'learning_rate': [0.1], 'scale_pos_weight': [1], 'min_child_weight': [0.01], 'subsample': [1], 'colsample_bytree': [0.5], 'reg_alpha': [0.01], } cv = make_cv( ) # cv = StratifiedKFold(target, n_folds=3, shuffle=True, random_state=0) all_ans = None all_target = None all_ids = None with open('train_feature_1.py', 'w') as f: f.write("LIST_TRAIN_COL = ['" + "', '".join(feature_column) + "']\n\n") logger.info('cv_start') for params in ParameterGrid(all_params): logger.info('param: %s' % (params)) for train_idx, test_idx in list(cv): list_estimator = [] ans = [] insample_ans = []