df = transform(df) os.makedirs(output_dir, exist_ok=True) params = { 'objective': 'binary', 'num_leaves': 32, 'min_data_in_leaf': 15, # 42, 'max_depth': -1, 'learning_rate': 0.1, 'boosting': 'gbdt', 'bagging_fraction': 0.7, # 0.5, 'feature_fraction': 0.5, 'bagging_seed': 0, 'reg_alpha': 0.1, # 1.728910519108444, 'reg_lambda': 1, 'random_state': 0, 'metric': 'auc', 'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 100 } df = df.drop(["user_answer"], axis=1) train_lgbm_cv(df, params=params, output_dir=output_dir, model_id=model_id, exp_name="exp003") break
'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 50 } df.tail(1000).to_csv("exp028.csv", index=False) df = df.drop(["user_answer", "tags", "type_of"], axis=1) df = df[df["answered_correctly"].notnull()] print(df.columns) print(df.shape) print(model_id) train_lgbm_cv(df, params=params, output_dir=output_dir, model_id=model_id, exp_name=model_id, is_debug=is_debug, drop_user_id=True) params = { 'n_estimators': 12000, 'learning_rate': 0.1, 'eval_metric': 'AUC', 'loss_function': 'Logloss', 'random_seed': 0, 'metric_period': 50, 'od_wait': 400, 'task_type': 'GPU', 'max_depth': 8, "verbose": 100
# df.to_pickle("../input/riiid-test-answer-prediction/train_merged.pickle") # print(len(df)) # df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle") df = transform(df) os.makedirs(output_dir, exist_ok=True) params = { 'objective': 'binary', 'num_leaves': 32, 'min_data_in_leaf': 15, # 42, 'max_depth': -1, 'learning_rate': 0.1, 'boosting': 'gbdt', 'bagging_fraction': 0.7, # 0.5, 'feature_fraction': 0.5, 'bagging_seed': 0, 'reg_alpha': 0.1, # 1.728910519108444, 'reg_lambda': 1, 'random_state': 0, 'metric': 'auc', 'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 100 } df = df.drop(["user_answer", "row_id"], axis=1) train_lgbm_cv(df, params=params, output_dir=output_dir, model_id=model_id) break
'objective': 'binary', 'num_leaves': 32, 'min_data_in_leaf': 15, # 42, 'max_depth': -1, 'learning_rate': 0.1, 'boosting': 'gbdt', 'bagging_fraction': 0.7, # 0.5, 'feature_fraction': 0.5, 'bagging_seed': 0, 'reg_alpha': 0.1, # 1.728910519108444, 'reg_lambda': 1, 'random_state': 0, 'metric': 'auc', 'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 100 } df = df.drop(["user_answer"], axis=1) print(df.columns) model_id = os.path.basename(fname).replace(".pickle", "") print(model_id) train_lgbm_cv(df, params=params, output_dir=output_dir, model_id=model_id, exp_name=f"exp008_{model_id}", drop_user_id=True) break
print(model_id) for _ in range(10000): params = { 'objective': 'binary', 'num_leaves': random.choice([8, 16, 32, 64, 128]), 'max_depth': -1, 'learning_rate': 0.3, 'boosting': random.choice(['gbdt', 'gbdt', 'gbdt', 'goss']), 'bagging_fraction': random.choice([0.1, 0.5, 0.7, 0.9]), 'feature_fraction': random.choice([0.1, 0.3, 0.5, 0.7, 0.9]), 'bagging_seed': 0, 'reg_alpha': random.choice([0, 0.1, 1, 5]), 'reg_lambda': random.choice([0, 0.1, 1, 5]), 'random_state': 0, 'metric': 'auc', 'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 100 } model_id = os.path.basename(fname).replace(".pickle", "") train_lgbm_cv(df, params=params, output_dir=output_dir, model_id=model_id, exp_name=model_id, drop_user_id=True, experiment_id=2) del params break
params = { 'objective': 'binary', 'num_leaves': 32, 'min_data_in_leaf': 15, # 42, 'max_depth': -1, 'learning_rate': 0.3, 'boosting': 'gbdt', 'bagging_fraction': 0.7, # 0.5, 'feature_fraction': 0.9, 'bagging_seed': 0, 'reg_alpha': 5, # 1.728910519108444, 'reg_lambda': 5, 'random_state': 0, 'metric': 'auc', 'verbosity': -1, "n_estimators": 10000, "early_stopping_rounds": 100 } df = df.drop(["user_answer", "tags", "type_of"], axis=1) df = df[df["answered_correctly"].notnull()] print(df.columns) print(df.shape) train_lgbm_cv(df, params=params, output_dir=output_dir, model_id=0, exp_name="exp11", drop_user_id=True)
"n_estimators": 10000, "early_stopping_rounds": 50 } df.tail(1000).to_csv("exp028.csv", index=False) df = df[df["answered_correctly"].notnull()] print("lgbm") print(df.columns) print(df.shape) model_id = os.path.basename(fname).replace(".pickle", "") print(model_id) train_lgbm_cv(df.drop(["user_answer", "tags", "type_of"], axis=1), params=params, output_dir=output_dir, model_id=model_id, exp_name=model_id, is_debug=is_debug, drop_user_id=True) useful_cols = [ "target_enc_content_id", "target_enc_['content_id', 'prior_question_had_explanation']", "user_rate_mean_content_id", "user_rate_sum_content_id", "shiftdiff_timestamp_by_user_id", "previous_answer_['user_id', 'content_id']", "target_enc_['content_id', 'user_count_bin']", "target_enc_['user_id', 'part']", "diff_user_level_target_enc_content_id", "user_id", "answered_correctly" ]