# =============================================================== # Global params # =============================================================== MAX_ROUND = 1200 # =============================================================== # Load and downcast # =============================================================== train, x_test, sub = load_all() x_train = train.drop('target', axis=1) y_train = train.target x_train, x_test = downcast(x_train, x_test) del train gc.collect() # =============================================================== # Run cv and bags # =============================================================== n_splits = 5 folds = list(KFold(n_splits=n_splits, shuffle=True, random_state=47).split(x_train, y_train)) trn_gini, y_trn, y_tst, fscore = etc07(x_train, y_train, x_test, folds, MAX_ROUND, n_splits) # =============================================================== # Save submission and figure # =============================================================== if MAX_ROUND > 300: sub.target = y_tst current_time = datetime.now().strftime('%Y%m%d_%H%M%S') sub_to_csv(sub, y_trn, trn_gini, etc07.__name__, current_time) plot_importance(fscore, trn_gini, etc07.__name__, current_time)
# =============================================================== MAX_ROUND = 800 # =============================================================== # Load and downcast # =============================================================== train, x_test, sub = load_all() x_train = train.drop('target', axis=1) y_train = train.target x_train, x_test = downcast(x_train, x_test) del train gc.collect() # =============================================================== # Run cv and bags # =============================================================== n_splits = 5 folds = list( KFold(n_splits=n_splits, shuffle=True, random_state=99).split(x_train, y_train)) trn_gini, y_trn, y_tst, fscore = xgb03(x_train, y_train, x_test, folds, MAX_ROUND, n_splits) # =============================================================== # Save submission and figure # =============================================================== if MAX_ROUND > 300: sub.target = y_tst current_time = datetime.now().strftime('%Y%m%d_%H%M%S') sub_to_csv(sub, y_trn, trn_gini, xgb03.__name__, current_time) plot_importance(fscore, trn_gini, xgb03.__name__, current_time)
# =============================================================== MAX_ROUND = 1200 # =============================================================== # Load and downcast # =============================================================== train, x_test, sub = load_all() x_train = train.drop('target', axis=1) y_train = train.target x_train, x_test = downcast(x_train, x_test) del train gc.collect() # =============================================================== # Run cv and bags # =============================================================== n_splits = 5 folds = list( KFold(n_splits=n_splits, shuffle=True, random_state=47).split(x_train, y_train)) trn_gini, y_trn, y_tst, fscore = log06(x_train, y_train, x_test, folds, MAX_ROUND, n_splits) # =============================================================== # Save submission and figure # =============================================================== if MAX_ROUND > 300: sub.target = y_tst current_time = datetime.now().strftime('%Y%m%d_%H%M%S') sub_to_csv(sub, y_trn, trn_gini, log06.__name__, current_time) plot_importance(fscore, trn_gini, log06.__name__, current_time)
# =============================================================== MAX_ROUND = 1200 # =============================================================== # Load and downcast # =============================================================== train, x_test, sub = load_all() x_train = train.drop('target', axis=1) y_train = train.target x_train, x_test = downcast(x_train, x_test) del train gc.collect() # =============================================================== # Run cv and bags # =============================================================== n_splits = 5 folds = list( KFold(n_splits=n_splits, shuffle=True, random_state=47).split(x_train, y_train)) trn_gini, y_trn, y_tst, fscore = cat05(x_train, y_train, x_test, folds, MAX_ROUND, n_splits) # =============================================================== # Save submission and figure # =============================================================== if MAX_ROUND > 300: sub.target = y_tst current_time = datetime.now().strftime('%Y%m%d_%H%M%S') sub_to_csv(sub, y_trn, trn_gini, cat05.__name__, current_time) plot_importance(fscore, trn_gini, cat05.__name__, current_time)
# =============================================================== MAX_ROUND = 1200 # =============================================================== # Load and downcast # =============================================================== train, x_test, sub = load_all() x_train = train.drop('target', axis=1) y_train = train.target x_train, x_test = downcast(x_train, x_test) del train gc.collect() # =============================================================== # Run cv and bags # =============================================================== n_splits = 5 folds = list( KFold(n_splits=n_splits, shuffle=True, random_state=47).split(x_train, y_train)) trn_gini, y_trn, y_tst, fscore = rgf04(x_train, y_train, x_test, folds, MAX_ROUND, n_splits) # =============================================================== # Save submission and figure # =============================================================== if MAX_ROUND > 300: sub.target = y_tst current_time = datetime.now().strftime('%Y%m%d_%H%M%S') sub_to_csv(sub, y_trn, trn_gini, rgf04.__name__, current_time) plot_importance(fscore, trn_gini, rgf04.__name__, current_time)