Ejemplo n.º 1
0
        ["user_answer", "tags", "type_of", "bundle_id", "previous_5_ans"],
        axis=1)
    df.columns = [
        x.replace("[", "_").replace("]", "_").replace("'", "_").replace(
            " ", "_").replace(",", "_") for x in df.columns
    ]
    df = df[df["answered_correctly"].notnull()]
    print(df.columns)
    print(df.shape)

    categorical_feature = ["content_id"]
    print(model_id)
    train_lgbm_cv_newuser(df,
                          categorical_feature=categorical_feature,
                          params=params,
                          output_dir=output_dir,
                          model_id=model_id,
                          exp_name=model_id,
                          is_debug=is_debug,
                          drop_user_id=True)
    """
    params = {
        'n_estimators': 12000,
        'learning_rate': 0.1,
        'eval_metric': 'AUC',
        'loss_function': 'Logloss',
        'random_seed': 0,
        'metric_period': 50,
        'od_wait': 400,
        'task_type': 'GPU',
        'max_depth': 8,
        "verbose": 100
Ejemplo n.º 2
0
                'metric': 'auc',
                'verbosity': -1,
                "n_estimators": 10000,
                "early_stopping_rounds": 50
            }
            df.tail(1000).to_csv("exp028.csv", index=False)

            df = df.drop([
                "user_answer", "tags", "type_of", "bundle_id", "previous_3_ans"
            ],
                         axis=1)
            df.columns = [
                x.replace("[",
                          "_").replace("]", "_").replace("'", "_").replace(
                              " ", "_").replace(",", "_") for x in df.columns
            ]
            df = df[df["answered_correctly"].notnull()]

            categorical_feature = ["tags1", "tags2", "content_id"]
            print(df.shape)
            print(model_id)
            train_lgbm_cv_newuser(
                df,
                categorical_feature=categorical_feature,
                params=params,
                output_dir=output_dir,
                model_id=model_id,
                exp_name=f"{model_id}_w2v_window{window}_size{size}",
                is_debug=is_debug,
                drop_user_id=True)
            break
Ejemplo n.º 3
0
    os.makedirs(output_dir, exist_ok=True)
    params = {
        'objective': 'binary',
        'num_leaves': 32,
        'min_data_in_leaf': 15,  # 42,
        'max_depth': -1,
        'learning_rate': 0.1,
        'boosting': 'gbdt',
        'bagging_fraction': 0.7,  # 0.5,
        'feature_fraction': 0.5,
        'bagging_seed': 0,
        'reg_alpha': 0.1,  # 1.728910519108444,
        'reg_lambda': 1,
        'random_state': 0,
        'metric': 'auc',
        'verbosity': -1,
        "n_estimators": 10000,
        "early_stopping_rounds": 100
    }

    df = df.drop(["user_answer"], axis=1)
    print(df.columns)

    train_lgbm_cv_newuser(df,
                          params=params,
                          output_dir=output_dir,
                          model_id=model_id,
                          exp_name="exp003",
                          new_user_ratio=0.01)
    # break
Ejemplo n.º 4
0
    df = df.drop(
        ["user_answer", "tags", "type_of", "bundle_id", "previous_3_ans"],
        axis=1)
    df.columns = [
        x.replace("[", "_").replace("]", "_").replace("'", "_").replace(
            " ", "_").replace(",", "_") for x in df.columns
    ]
    df = df[df["answered_correctly"].notnull()]
    print(df.columns)
    print(df.shape)

    print(model_id)
    train_lgbm_cv_newuser(df,
                          params=params,
                          output_dir=output_dir,
                          model_id=model_id,
                          exp_name=model_id,
                          is_debug=is_debug,
                          drop_user_id=True)

    if is_debug:
        break
# fit
df_question = pd.read_csv(
    "../input/riiid-test-answer-prediction/questions.csv",
    dtype={
        "bundle_id": "int32",
        "question_id": "int32",
        "correct_answer": "int8",
        "part": "int8"
    })
Ejemplo n.º 5
0
    df.columns = [
        x.replace("[", "_").replace("]", "_").replace("'", "_").replace(
            " ", "_").replace(",", "_") for x in df.columns
    ]
    df = df[df["answered_correctly"].notnull()]
    print(df.columns)
    print(df.shape)

    categorical_feature = ["tags1", "tags2", "content_id"]
    print(model_id)
    use_columns = pd.read_csv("../output/ex_131/20201125230514/imp_train_0.csv"
                              )["feature"].values[:40].tolist()
    train_lgbm_cv_newuser(df[["answered_correctly", "user_id"] + use_columns],
                          categorical_feature=categorical_feature,
                          params=params,
                          output_dir=output_dir,
                          model_id=model_id,
                          exp_name=model_id,
                          is_debug=is_debug,
                          drop_user_id=True)
    params = {
        'n_estimators': 12000,
        'learning_rate': 0.1,
        'eval_metric': 'AUC',
        'loss_function': 'Logloss',
        'random_seed': 0,
        'metric_period': 50,
        'od_wait': 400,
        'task_type': 'GPU',
        'max_depth': 8,
        "verbose": 100
    }