def test_find_best_budgets_1(self): campaign = Campaign(2, 100, 3) campaign.set_sub_campaign_values(0, [3, 7, 14]) campaign.set_sub_campaign_values(1, [2, 5, 7]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) self.assertEqual(max_clicks, 16) self.assertTrue((best_budgets == np.array([100, 0])).all())
def test_find_best_budgets_2(self): campaign = Campaign(3, 90, 4) campaign.set_sub_campaign_values(0, [0, 3, 12, 20]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) self.assertEqual(max_clicks, 20) self.assertTrue((best_budgets == np.array([90, 0, 0])).all())
def test_optimize_1(self): campaign = Campaign(2, 100, 3) campaign.set_sub_campaign_values(0, [3, 7, 14]) campaign.set_sub_campaign_values(1, [2, 5, 7]) opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign) true_opt_matrix = np.array([[0, 0, 0], [3, 7, 14], [5, 9, 16]]) true_max_idx_matrix = np.array([[-1, -1, -1], [0, 1, 2], [0, 0, 0]]) self.assertTrue((true_opt_matrix == opt_matrix).all()) self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
def test_find_best_budgets_3(self): campaign = Campaign(4, 100, 5) campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18]) campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) self.assertEqual(max_clicks, 29) self.assertTrue((best_budgets == np.array([75, 0, 0, 25])).all())
def test_optimize_2(self): campaign = Campaign(3, 90, 4) campaign.set_sub_campaign_values(0, [0, 3, 12, 20]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12]) opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign) true_opt_matrix = np.array([[0, 0, 0, 0], [0, 3, 12, 20], [0, 3, 12, 20], [0, 5, 12, 20]]) true_max_idx_matrix = np.array([[-1, -1, -1, -1], [0, 1, 2, 3], [0, 0, 0, 0], [0, 1, 0, 0]]) self.assertTrue((true_opt_matrix == opt_matrix).all()) self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
def test_optimize_3(self): campaign = Campaign(4, 100, 5) campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18]) campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7]) opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign) true_opt_matrix = np.array([[0, 0, 0, 0, 0], [0, 3, 12, 20, 20], [0, 3, 12, 20, 22], [0, 5, 12, 20, 25], [0, 9, 14, 21, 29]]) true_max_idx_matrix = np.array([[-1, -1, -1, -1, -1], [0, 1, 2, 3, 3], [0, 0, 0, 0, 1], [0, 1, 0, 0, 1], [0, 1, 1, 1, 1]]) self.assertTrue((true_opt_matrix == opt_matrix).all()) self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
def test_find_best_budgets_4(self): campaign = Campaign(3, 100, 11) # 100(0.0+0.3)*b = 30*x1 = max 3000 # 100(0.2+0.2)*b = 40*x2 = max 4000 # 100(0.4+0.1)*b = 50*x3 = max 5000 campaign.set_sub_campaign_values(0, [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000]) campaign.set_sub_campaign_values(1, [0, 400, 800, 1200, 1600, 2000, 2000, 2000, 2000, 2000, 2000]) campaign.set_sub_campaign_values(2, [0, 500, 1000, 1500, 1500, 1500, 1500, 1500, 1500, 1500, 1500]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) print(max_clicks) print(best_budgets) my_best_budgets = np.array([20.0, 50.0, 30.0]) self.assertTrue((best_budgets == my_best_budgets).all())
# Something mean_scenario: Scenario = EnvironmentManager.load_scenario( SCENARIO_NAME, get_mean_function=True) click_function_list: List[IStochasticFunction] = mean_scenario.get_phases( )[0].get_n_clicks_function() # Optimal point computation campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(), cum_budget=BUDGET, n_arms=N_ARMS_ADV) for i in range(campaign.get_n_sub_campaigns()): sub_campaign_values = [ click_function_list[i].draw_sample(b) for b in np.linspace(0, BUDGET, N_ARMS_ADV) ] campaign.set_sub_campaign_values(i, sub_campaign_values) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) # # Compute regret # if CSV_CUM_REGRET: # mean_regret_data = np.zeros(shape=(n_bandit + 1, n_days)) # std_regret_data = np.zeros(shape=(n_bandit + 1, n_days)) # mean_regret_data[-1] = np.arange(n_days) + 1 # std_regret_data[-1] = np.arange(n_days) + 1 # # for bandit_idx in range(len(BANDIT_NAME)): # n_exp = len(total_reward_list[bandit_idx]) # # for curr_day in range(n_days): # daily_values = [] # for exp in range(n_exp):
def pull_budget(self) -> List[int]: max_best_clicks = 0 max_best_budgets = None for arm_idx in range(len(self.arm_values)): subcampaign_values = self.campaign.get_sub_campaigns() arm_value_campaign = Campaign(self.campaign.get_n_sub_campaigns(), self.campaign.get_cum_budget(), len(self.campaign.get_budgets())) for sub_idx in range(len(subcampaign_values)): arm_value_campaign.set_sub_campaign_values( sub_idx, subcampaign_values[sub_idx] * self.value_per_clicks_per_price_idx[arm_idx][sub_idx]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets( self.campaign) if max_best_clicks < max_clicks: max_best_clicks = max_clicks max_best_budgets = best_budgets # Balance price learner with number of clicks distribution estimation budget_value_to_index = { value: i for i, value in enumerate(self.campaign.get_budgets()) } estimated_clicks = np.array([ self.campaign.get_sub_campaigns()[sub_idx, budget_value_to_index[budget]] for sub_idx, budget in enumerate(max_best_budgets) ]) user_probabilities = estimated_clicks / np.sum(estimated_clicks) self.unique_price_learner: DiscreteBandit = self.price_bandit_class( **self.price_bandit_kwargs) for arm_idx in range(len(self.arm_values)): rewards_per_subcampaign = [] for sub_idx in range(self.campaign.get_n_sub_campaigns()): rewards_per_subcampaign.append( self.rewards_per_arm_per_user_class[sub_idx][arm_idx]) rewards_len = np.array( [len(rewards) for rewards in rewards_per_subcampaign]) solution = np.min(rewards_len / user_probabilities) balanced_rewards_len = np.array(np.floor(solution * user_probabilities), dtype=int) # Clip values of balanced rewards length between 0 and original rewards length balanced_rewards_len = np.maximum(balanced_rewards_len, 0) balanced_rewards_len = np.minimum(balanced_rewards_len, rewards_len) for sub_idx in range(self.campaign.get_n_sub_campaigns()): sampled_rewards = np.random.choice( rewards_per_subcampaign[sub_idx], size=balanced_rewards_len[sub_idx], replace=False) for reward in sampled_rewards: self.unique_price_learner.update(arm_idx, reward) return [budget_value_to_index[budget] for budget in max_best_budgets]