def test_performance_of_binary_outcome_models( fixed_synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray) -> None: """ Test the performance of ope estimators using synthetic bandit data and random evaluation policy when the importance weight estimator is estimated by a logistic regression """ bandit_feedback = fixed_synthetic_bandit_feedback.copy() action_dist = random_action_dist random_state = 12345 auc_scores: Dict[str, float] = {} fit_methods = ["sample", "raw"] for fit_method in fit_methods: for model_name, model in binary_model_dict.items(): importance_weight_estimator = ImportanceWeightEstimator( n_actions=bandit_feedback["n_actions"], action_context=bandit_feedback["action_context"], base_model=model(**hyperparams[model_name]), fitting_method=fit_method, len_list=1, ) # train importance weight estimator on logged bandit feedback data estimated_importance_weight = importance_weight_estimator.fit_predict( context=bandit_feedback["context"], action=bandit_feedback["action"], action_dist=action_dist, n_folds=2, # 2-fold cross-fitting random_state=random_state, evaluate_model_performance=True, ) assert np.all(estimated_importance_weight >= 0 ), "estimated_importance_weight must be non-negative" # extract predictions tmp_y = [] tmp_pred = [] for i in range(len(importance_weight_estimator.eval_result["y"])): tmp_y.append(importance_weight_estimator.eval_result["y"][i]) tmp_pred.append( importance_weight_estimator.eval_result["proba"][i]) y_test = np.array(tmp_y).flatten() y_pred = np.array(tmp_pred).flatten() auc_scores[model_name + "_" + fit_method] = roc_auc_score( y_true=y_test, y_score=y_pred, ) for model_name in auc_scores: print(f"AUC of {model_name} is {auc_scores[model_name]}") assert (auc_scores[model_name] > 0.5), f"AUC of {model_name} should be greater than 0.5"
def test_performance_of_binary_outcome_models( fixed_synthetic_bandit_feedback: BanditFeedback, ) -> None: """ Test the performance of ope estimators using synthetic bandit data and random evaluation policy when the propensity score estimator is estimated by a logistic regression """ bandit_feedback = fixed_synthetic_bandit_feedback.copy() random_state = 12345 auc_scores: Dict[str, float] = {} for model_name, model in binary_model_dict.items(): propensity_score_estimator = PropensityScoreEstimator( n_actions=bandit_feedback["n_actions"], base_model=model(**hyperparams[model_name]), len_list=1, ) # train propensity score estimator on logged bandit feedback data estimated_propensity_score = propensity_score_estimator.fit_predict( context=bandit_feedback["context"], action=bandit_feedback["action"], n_folds=2, # 2-fold cross-fitting random_state=random_state, evaluate_model_performance=True, ) assert np.all( estimated_propensity_score >= 0 ), "estimated_propensity_score must be non-negative" # extract predictions tmp_y = [] tmp_pred = [] for i in range(len(propensity_score_estimator.eval_result["y"])): tmp_y.append(propensity_score_estimator.eval_result["y"][i]) tmp_pred.append(propensity_score_estimator.eval_result["proba"][i]) y_test = np.array(tmp_y).flatten() y_pred = np.array(tmp_pred).reshape(-1, tmp_pred[0].shape[1]) auc_scores[model_name] = roc_auc_score( y_true=y_test, y_score=y_pred, multi_class="ovo" ) for model_name in auc_scores: print(f"AUC (macro-ovo) of {model_name} is {auc_scores[model_name]}") assert ( auc_scores[model_name] > 0.5 ), f"AUC of {model_name} should be greater than 0.5"
def test_performance_of_binary_outcome_models( fixed_synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray ) -> None: """ Test the performance of ope estimators using synthetic bandit data and random evaluation policy when the regression model is estimated by a logistic regression """ bandit_feedback = fixed_synthetic_bandit_feedback.copy() expected_reward = np.expand_dims(bandit_feedback["expected_reward"], axis=-1) action_dist = random_action_dist # compute ground truth policy value using expected reward q_pi_e = np.average(expected_reward[:, :, 0], weights=action_dist[:, :, 0], axis=1) # compute statistics of ground truth policy value gt_mean = q_pi_e.mean() random_state = 12345 auc_scores: Dict[str, float] = {} # check ground truth print(f"gt_mean: {gt_mean}") # check the performance of regression models using doubly robust criteria (|\hat{q} - q| <= |q| is satisfied with a high probability) dr_criteria_pass_rate = 0.8 fit_methods = ["normal", "iw", "mrdr"] for fit_method in fit_methods: for model_name, model in binary_model_dict.items(): regression_model = RegressionModel( n_actions=bandit_feedback["n_actions"], len_list=int(bandit_feedback["position"].max() + 1), action_context=bandit_feedback["action_context"], base_model=model(**hyperparams[model_name]), fitting_method=fit_method, ) if fit_method == "normal": # train regression model on logged bandit feedback data estimated_rewards_by_reg_model = regression_model.fit_predict( context=bandit_feedback["context"], action=bandit_feedback["action"], reward=bandit_feedback["reward"], n_folds=3, # 3-fold cross-fitting random_state=random_state, ) else: # train regression model on logged bandit feedback data estimated_rewards_by_reg_model = regression_model.fit_predict( context=bandit_feedback["context"], action=bandit_feedback["action"], reward=bandit_feedback["reward"], pscore=bandit_feedback["pscore"], position=bandit_feedback["position"], action_dist=action_dist, n_folds=3, # 3-fold cross-fitting random_state=random_state, ) auc_scores[model_name + "_" + fit_method] = roc_auc_score( y_true=bandit_feedback["reward"], y_score=estimated_rewards_by_reg_model[ np.arange(bandit_feedback["reward"].shape[0]), bandit_feedback["action"], bandit_feedback["position"], ], ) # compare dr criteria dr_criteria = np.abs((gt_mean - estimated_rewards_by_reg_model)) - np.abs( gt_mean ) print( f"Dr criteria is satisfied with probability {np.mean(dr_criteria <= 0)} ------ model: {model_name} ({fit_method})," ) assert ( np.mean(dr_criteria <= 0) >= dr_criteria_pass_rate ), f" should be satisfied with a probability at least {dr_criteria_pass_rate}" for model_name in auc_scores: print(f"AUC of {model_name} is {auc_scores[model_name]}") assert ( auc_scores[model_name] > 0.5 ), f"AUC of {model_name} should be greater than 0.5"