["user_answer", "tags", "type_of", "bundle_id", "previous_5_ans"], axis=1) df.columns = [ x.replace("[", "_").replace("]", "_").replace("'", "_").replace( " ", "_").replace(",", "_") for x in df.columns ] df = df[df["answered_correctly"].notnull()] print(df.columns) print(df.shape) categorical_feature = ["content_id"] print(model_id) train_lgbm_cv_newuser(df, categorical_feature=categorical_feature, params=params, output_dir=output_dir, model_id=model_id, exp_name=model_id, is_debug=is_debug, drop_user_id=True) """ params = { 'n_estimators': 12000, 'learning_rate': 0.1, 'eval_metric': 'AUC', 'loss_function': 'Logloss', 'random_seed': 0, 'metric_period': 50, 'od_wait': 400, 'task_type': 'GPU', 'max_depth': 8, "verbose": 100
'metric': 'auc', 'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 50 } df.tail(1000).to_csv("exp028.csv", index=False) df = df.drop([ "user_answer", "tags", "type_of", "bundle_id", "previous_3_ans" ], axis=1) df.columns = [ x.replace("[", "_").replace("]", "_").replace("'", "_").replace( " ", "_").replace(",", "_") for x in df.columns ] df = df[df["answered_correctly"].notnull()] categorical_feature = ["tags1", "tags2", "content_id"] print(df.shape) print(model_id) train_lgbm_cv_newuser( df, categorical_feature=categorical_feature, params=params, output_dir=output_dir, model_id=model_id, exp_name=f"{model_id}_w2v_window{window}_size{size}", is_debug=is_debug, drop_user_id=True) break
os.makedirs(output_dir, exist_ok=True) params = { 'objective': 'binary', 'num_leaves': 32, 'min_data_in_leaf': 15, # 42, 'max_depth': -1, 'learning_rate': 0.1, 'boosting': 'gbdt', 'bagging_fraction': 0.7, # 0.5, 'feature_fraction': 0.5, 'bagging_seed': 0, 'reg_alpha': 0.1, # 1.728910519108444, 'reg_lambda': 1, 'random_state': 0, 'metric': 'auc', 'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 100 } df = df.drop(["user_answer"], axis=1) print(df.columns) train_lgbm_cv_newuser(df, params=params, output_dir=output_dir, model_id=model_id, exp_name="exp003", new_user_ratio=0.01) # break
df = df.drop( ["user_answer", "tags", "type_of", "bundle_id", "previous_3_ans"], axis=1) df.columns = [ x.replace("[", "_").replace("]", "_").replace("'", "_").replace( " ", "_").replace(",", "_") for x in df.columns ] df = df[df["answered_correctly"].notnull()] print(df.columns) print(df.shape) print(model_id) train_lgbm_cv_newuser(df, params=params, output_dir=output_dir, model_id=model_id, exp_name=model_id, is_debug=is_debug, drop_user_id=True) if is_debug: break # fit df_question = pd.read_csv( "../input/riiid-test-answer-prediction/questions.csv", dtype={ "bundle_id": "int32", "question_id": "int32", "correct_answer": "int8", "part": "int8" })
df.columns = [ x.replace("[", "_").replace("]", "_").replace("'", "_").replace( " ", "_").replace(",", "_") for x in df.columns ] df = df[df["answered_correctly"].notnull()] print(df.columns) print(df.shape) categorical_feature = ["tags1", "tags2", "content_id"] print(model_id) use_columns = pd.read_csv("../output/ex_131/20201125230514/imp_train_0.csv" )["feature"].values[:40].tolist() train_lgbm_cv_newuser(df[["answered_correctly", "user_id"] + use_columns], categorical_feature=categorical_feature, params=params, output_dir=output_dir, model_id=model_id, exp_name=model_id, is_debug=is_debug, drop_user_id=True) params = { 'n_estimators': 12000, 'learning_rate': 0.1, 'eval_metric': 'AUC', 'loss_function': 'Logloss', 'random_seed': 0, 'metric_period': 50, 'od_wait': 400, 'task_type': 'GPU', 'max_depth': 8, "verbose": 100 }