コード例 #1
0
ファイル: 07_etc.py プロジェクト: Oreki47/kaggle_competitions
    os.chdir("..")  # set to parent dir

    # ===============================================================
    # Global params
    # ===============================================================
    MAX_ROUND = 1200
    # ===============================================================
    # Load and downcast
    # ===============================================================
    train, x_test, sub = load_all()
    x_train = train.drop('target', axis=1)
    y_train = train.target
    x_train, x_test = downcast(x_train, x_test)

    del train
    gc.collect()
    # ===============================================================
    # Run cv and bags
    # ===============================================================
    n_splits = 5
    folds = list(KFold(n_splits=n_splits, shuffle=True, random_state=47).split(x_train, y_train))
    trn_gini, y_trn, y_tst, fscore = etc07(x_train, y_train, x_test, folds, MAX_ROUND, n_splits)

    # ===============================================================
    # Save submission and figure
    # ===============================================================
    if MAX_ROUND > 300:
        sub.target = y_tst
        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
        sub_to_csv(sub, y_trn, trn_gini, etc07.__name__, current_time)
        plot_importance(fscore, trn_gini, etc07.__name__, current_time)
コード例 #2
0
    # Global params
    # ===============================================================
    MAX_ROUND = 800
    # ===============================================================
    # Load and downcast
    # ===============================================================
    train, x_test, sub = load_all()
    x_train = train.drop('target', axis=1)
    y_train = train.target
    x_train, x_test = downcast(x_train, x_test)

    del train
    gc.collect()
    # ===============================================================
    # Run cv and bags
    # ===============================================================
    n_splits = 5
    folds = list(
        KFold(n_splits=n_splits, shuffle=True,
              random_state=99).split(x_train, y_train))
    trn_gini, y_trn, y_tst, fscore = xgb03(x_train, y_train, x_test, folds,
                                           MAX_ROUND, n_splits)

    # ===============================================================
    # Save submission and figure
    # ===============================================================
    if MAX_ROUND > 300:
        sub.target = y_tst
        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
        sub_to_csv(sub, y_trn, trn_gini, xgb03.__name__, current_time)
        plot_importance(fscore, trn_gini, xgb03.__name__, current_time)
コード例 #3
0
ファイル: 05_cat.py プロジェクト: Oreki47/kaggle_competitions
    # Global params
    # ===============================================================
    MAX_ROUND = 1200
    # ===============================================================
    # Load and downcast
    # ===============================================================
    train, x_test, sub = load_all()
    x_train = train.drop('target', axis=1)
    y_train = train.target
    x_train, x_test = downcast(x_train, x_test)

    del train
    gc.collect()
    # ===============================================================
    # Run cv and bags
    # ===============================================================
    n_splits = 5
    folds = list(
        KFold(n_splits=n_splits, shuffle=True,
              random_state=47).split(x_train, y_train))
    trn_gini, y_trn, y_tst, fscore = cat05(x_train, y_train, x_test, folds,
                                           MAX_ROUND, n_splits)

    # ===============================================================
    # Save submission and figure
    # ===============================================================
    if MAX_ROUND > 300:
        sub.target = y_tst
        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
        sub_to_csv(sub, y_trn, trn_gini, cat05.__name__, current_time)
        plot_importance(fscore, trn_gini, cat05.__name__, current_time)
コード例 #4
0
    # Load and downcast
    # ===============================================================
    train, x_test, sub = load_all()
    x_train = train.drop('target', axis=1)
    y_train = train.target
    x_train, x_test = downcast(x_train, x_test)

    del train
    gc.collect()
    # ===============================================================
    # Run cv and bags
    # ===============================================================
    n_splits = 5
    bags = 5
    folds = list(
        KFold(n_splits=n_splits, shuffle=True,
              random_state=177).split(x_train, y_train))  # seed = 177
    trn_gini, y_trn, y_tst, y_tst_mrank, fscore = xgb_bagging(
        x_train, y_train, x_test, folds, MAX_ROUND, n_splits, bags)

    # ===============================================================
    # Save submission and figure
    # ===============================================================
    if MAX_ROUND > 100:
        sub.target = y_tst
        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
        # sub_to_csv(sub, y_trn, trn_gini, "BadOfFiveMediumRank", current_time)
        sub_to_csv(sub, y_tst_mrank, trn_gini, xgb_bagging.__name__,
                   current_time)
        plot_importance(fscore, trn_gini, xgb_bagging.__name__, current_time)
コード例 #5
0
    # Global params
    # ===============================================================
    MAX_ROUND = 1200
    # ===============================================================
    # Load and downcast
    # ===============================================================
    train, x_test, sub = load_all()
    x_train = train.drop('target', axis=1)
    y_train = train.target
    x_train, x_test = downcast(x_train, x_test)

    del train
    gc.collect()
    # ===============================================================
    # Run cv and bags
    # ===============================================================
    n_splits = 5
    folds = list(
        KFold(n_splits=n_splits, shuffle=True,
              random_state=47).split(x_train, y_train))
    trn_gini, y_trn, y_tst, fscore = log06(x_train, y_train, x_test, folds,
                                           MAX_ROUND, n_splits)

    # ===============================================================
    # Save submission and figure
    # ===============================================================
    if MAX_ROUND > 300:
        sub.target = y_tst
        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
        sub_to_csv(sub, y_trn, trn_gini, log06.__name__, current_time)
        plot_importance(fscore, trn_gini, log06.__name__, current_time)
コード例 #6
0
ファイル: 04_rgf.py プロジェクト: Oreki47/kaggle_competitions
    # Global params
    # ===============================================================
    MAX_ROUND = 1200
    # ===============================================================
    # Load and downcast
    # ===============================================================
    train, x_test, sub = load_all()
    x_train = train.drop('target', axis=1)
    y_train = train.target
    x_train, x_test = downcast(x_train, x_test)

    del train
    gc.collect()
    # ===============================================================
    # Run cv and bags
    # ===============================================================
    n_splits = 5
    folds = list(
        KFold(n_splits=n_splits, shuffle=True,
              random_state=47).split(x_train, y_train))
    trn_gini, y_trn, y_tst, fscore = rgf04(x_train, y_train, x_test, folds,
                                           MAX_ROUND, n_splits)

    # ===============================================================
    # Save submission and figure
    # ===============================================================
    if MAX_ROUND > 300:
        sub.target = y_tst
        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
        sub_to_csv(sub, y_trn, trn_gini, rgf04.__name__, current_time)
        plot_importance(fscore, trn_gini, rgf04.__name__, current_time)