def __init__(self, campaign: Campaign, number_of_visit_model_list: List[DiscreteRegressor], n_arms_profit: int, arm_profit: np.array, min_std: float): assert len(number_of_visit_model_list) == campaign.get_n_sub_campaigns() super().__init__(campaign=campaign) # Number of visit data structure self.number_of_visit_model_list: List[DiscreteRegressor] = number_of_visit_model_list self.collected_rewards_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())] self.pulled_arm_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())] # Pricing data structure self.n_arms_price: int = n_arms_profit self.arm_profit = arm_profit self.profit_arm_reward_list = [[[] for _ in range(n_arms_profit)] for _ in range(campaign.get_n_sub_campaigns())] self.min_std = min_std # Current data structure self.current_pricing_arm_idx = np.random.randint(low=0, high=n_arms_profit, size=1) self.curr_best_budget_idx: List[int] = [0 for _ in range(campaign.get_n_sub_campaigns())] self.current_budget_allocation = [0 for _ in range(campaign.get_n_sub_campaigns())] # Initializing randomly budget values for sub_index, model in enumerate(self.number_of_visit_model_list): sub_campaign_values = self.number_of_visit_model_list[sub_index].sample_distribution() self.campaign.set_sub_campaign_values(sub_index, sub_campaign_values) _, best_budgets = CampaignOptimizer.find_best_budgets(self.campaign) self.curr_best_budget_idx = [np.where(self.campaign.get_budgets() == budget)[0][0] for budget in best_budgets]
def test_find_best_budgets_1(self): campaign = Campaign(2, 100, 3) campaign.set_sub_campaign_values(0, [3, 7, 14]) campaign.set_sub_campaign_values(1, [2, 5, 7]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) self.assertEqual(max_clicks, 16) self.assertTrue((best_budgets == np.array([100, 0])).all())
def update_budget(self, pulled_arm_list: List[int], n_visits: List[float]): super(JointBanditFixedDailyPriceQuantile, self).update_budget(pulled_arm_list, n_visits) # Update data structure for i in range(self.campaign.get_n_sub_campaigns()): self.pulled_arm_sub_campaign[i].append(pulled_arm_list[i]) self.collected_rewards_sub_campaign[i].append(n_visits[i]) # Update model with new information for sub_index, model in enumerate(self.number_of_visit_model_list): model.fit_model(collected_rewards=self.collected_rewards_sub_campaign[sub_index], pulled_arm_history=self.pulled_arm_sub_campaign[sub_index]) # For all the sub-campaigns and profit-arms compute mean and std, and the quantile mean_ad_value = np.zeros(shape=(self.campaign.get_n_sub_campaigns(), self.n_arms_price)) std_ad_value = np.zeros(shape=(self.campaign.get_n_sub_campaigns(), self.n_arms_price)) percentile = 1 - (1 / (self.day_t + 1)) for c in range(self.campaign.get_n_sub_campaigns()): for arm in range(self.n_arms_price): values = np.array(self.profit_arm_reward_list[c][arm]) mean_ad_value[c][arm] = values.mean() if len(values) > 0 else self.arm_profit[arm] std_ad_value[c][arm] = values.std() if len(values) > 0 else self.arm_profit[arm] std_ad_value = np.where(std_ad_value < self.min_std, self.min_std, std_ad_value) estimated_ad_value = norm.ppf(q=percentile, loc=mean_ad_value, scale=std_ad_value) # Sample the number of visits for each sub-campaign sample_visit = np.zeros( shape=(self.campaign.get_n_sub_campaigns(), len(self.number_of_visit_model_list[0].arms))) for c in range(self.campaign.get_n_sub_campaigns()): sample_visit[c] = self.number_of_visit_model_list[c].sample_distribution() # Joint optimization of advertising and pricing best_arm_profit_idx = -1 curr_max_profit = -1 curr_best_budget_idx = [-1 for _ in range(self.campaign.get_n_sub_campaigns())] for profit_arm_index in range(self.n_arms_price): # Set campaign for sub_campaign_idx in range(self.campaign.get_n_sub_campaigns()): sub_campaign_visits = sample_visit[sub_campaign_idx] sub_campaign_values = sub_campaign_visits * estimated_ad_value[sub_campaign_idx][profit_arm_index] self.campaign.set_sub_campaign_values(sub_campaign_idx, sub_campaign_values) # Campaign optimization max_profit, best_budgets = CampaignOptimizer.find_best_budgets(self.campaign) if max_profit > curr_max_profit: curr_max_profit = max_profit curr_best_budget_idx = [np.where(self.campaign.get_budgets() == budget)[0][0] for budget in best_budgets] best_arm_profit_idx = profit_arm_index self.current_pricing_arm_idx = best_arm_profit_idx self.curr_best_budget_idx = curr_best_budget_idx
def test_find_best_budgets_2(self): campaign = Campaign(3, 90, 4) campaign.set_sub_campaign_values(0, [0, 3, 12, 20]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) self.assertEqual(max_clicks, 20) self.assertTrue((best_budgets == np.array([90, 0, 0])).all())
def test_find_best_budgets_3(self): campaign = Campaign(4, 100, 5) campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18]) campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) self.assertEqual(max_clicks, 29) self.assertTrue((best_budgets == np.array([75, 0, 0, 25])).all())
def test_optimize_1(self): campaign = Campaign(2, 100, 3) campaign.set_sub_campaign_values(0, [3, 7, 14]) campaign.set_sub_campaign_values(1, [2, 5, 7]) opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign) true_opt_matrix = np.array([[0, 0, 0], [3, 7, 14], [5, 9, 16]]) true_max_idx_matrix = np.array([[-1, -1, -1], [0, 1, 2], [0, 0, 0]]) self.assertTrue((true_opt_matrix == opt_matrix).all()) self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
def test_find_best_budgets_4(self): campaign = Campaign(3, 100, 11) # 100(0.0+0.3)*b = 30*x1 = max 3000 # 100(0.2+0.2)*b = 40*x2 = max 4000 # 100(0.4+0.1)*b = 50*x3 = max 5000 campaign.set_sub_campaign_values(0, [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000]) campaign.set_sub_campaign_values(1, [0, 400, 800, 1200, 1600, 2000, 2000, 2000, 2000, 2000, 2000]) campaign.set_sub_campaign_values(2, [0, 500, 1000, 1500, 1500, 1500, 1500, 1500, 1500, 1500, 1500]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) print(max_clicks) print(best_budgets) my_best_budgets = np.array([20.0, 50.0, 30.0]) self.assertTrue((best_budgets == my_best_budgets).all())
def test_optimize_2(self): campaign = Campaign(3, 90, 4) campaign.set_sub_campaign_values(0, [0, 3, 12, 20]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12]) opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign) true_opt_matrix = np.array([[0, 0, 0, 0], [0, 3, 12, 20], [0, 3, 12, 20], [0, 5, 12, 20]]) true_max_idx_matrix = np.array([[-1, -1, -1, -1], [0, 1, 2, 3], [0, 0, 0, 0], [0, 1, 0, 0]]) self.assertTrue((true_opt_matrix == opt_matrix).all()) self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
def pull_arm(self, value_per_clicks: List[float] = None) -> List[int]: """ Find the best allocation of budgets by optimizing the combinatorial problem of the campaign and then return the indices of the best budgets. The combinatorial problem is optimized given estimates provided only by the last data (amount specified by the sliding window) :return: the indices of the best budgets given the actual campaign """ if value_per_clicks is None: value_per_clicks = np.ones( shape=self.campaign.get_n_sub_campaigns()) temp_campaign: Campaign = copy(self.campaign) temp_campaign.multiply_sub_campaign_values(value_per_clicks) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets( temp_campaign) return [ np.where(temp_campaign.get_budgets() == budget)[0][0] for budget in best_budgets ]
def test_optimize_3(self): campaign = Campaign(4, 100, 5) campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15]) campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9]) campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18]) campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7]) opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign) true_opt_matrix = np.array([[0, 0, 0, 0, 0], [0, 3, 12, 20, 20], [0, 3, 12, 20, 22], [0, 5, 12, 20, 25], [0, 9, 14, 21, 29]]) true_max_idx_matrix = np.array([[-1, -1, -1, -1, -1], [0, 1, 2, 3, 3], [0, 0, 0, 0, 1], [0, 1, 0, 0, 1], [0, 1, 1, 1, 1]]) self.assertTrue((true_opt_matrix == opt_matrix).all()) self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
mean_scenario: Scenario = EnvironmentManager.load_scenario( SCENARIO_NAME, get_mean_function=True) click_function_list: List[IStochasticFunction] = mean_scenario.get_phases( )[0].get_n_clicks_function() # Optimal point computation campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(), cum_budget=BUDGET, n_arms=N_ARMS_ADV) for i in range(campaign.get_n_sub_campaigns()): sub_campaign_values = [ click_function_list[i].draw_sample(b) for b in np.linspace(0, BUDGET, N_ARMS_ADV) ] campaign.set_sub_campaign_values(i, sub_campaign_values) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign) # # Compute regret # if CSV_CUM_REGRET: # mean_regret_data = np.zeros(shape=(n_bandit + 1, n_days)) # std_regret_data = np.zeros(shape=(n_bandit + 1, n_days)) # mean_regret_data[-1] = np.arange(n_days) + 1 # std_regret_data[-1] = np.arange(n_days) + 1 # # for bandit_idx in range(len(BANDIT_NAME)): # n_exp = len(total_reward_list[bandit_idx]) # # for curr_day in range(n_days): # daily_values = [] # for exp in range(n_exp): # daily_values.append(max_clicks - total_reward_list[bandit_idx][exp][0][curr_day])
prices_arr = np.linspace(MIN_PRICE, MAX_PRICE, N_ARMS_PRICE) best_profit_idx = 0 max_value = 0 best_budget_value = [0 for _ in range(mean_scenario.get_n_subcampaigns())] campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(), cum_budget=DAILY_BUDGET, n_arms=N_ARMS_ADS) for i in range(campaign.get_n_sub_campaigns()): n_clicks = np.array([click_function_list[i].draw_sample(b) for b in np.linspace(0, DAILY_BUDGET, N_ARMS_ADS)]) click_values_per_price = [crp_function_list[i].draw_sample(price) * (price - FIXED_COST) for price in prices_arr] best_profit_idx = int(np.argmax(click_values_per_price)) print("Best price for subcampaign {} is {}".format(i+1, prices_arr[best_profit_idx])) values = n_clicks * click_values_per_price[best_profit_idx] campaign.set_sub_campaign_values(i, values) max_value, best_budget_value = CampaignOptimizer.find_best_budgets(campaign) print("Best budget allocation is {}\n".format(best_budget_value)) print("The expected best daily profit is given by {}\n".format(max_value)) # Instantaneous rewards computation if CSV_REWARD: n_days = len(total_reward_list[0][0]) mean_reward = np.zeros(shape=(n_bandit+1, n_days)) std_reward = np.zeros(shape=(n_bandit+1, n_days)) mean_reward[-1] = np.arange(n_days) std_reward[-1] = np.arange(n_days) for bandit_idx in range(n_bandit): n_exp = len(total_reward_list[bandit_idx])
def pull_budget(self) -> List[int]: max_best_clicks = 0 max_best_budgets = None for arm_idx in range(len(self.arm_values)): subcampaign_values = self.campaign.get_sub_campaigns() arm_value_campaign = Campaign(self.campaign.get_n_sub_campaigns(), self.campaign.get_cum_budget(), len(self.campaign.get_budgets())) for sub_idx in range(len(subcampaign_values)): arm_value_campaign.set_sub_campaign_values( sub_idx, subcampaign_values[sub_idx] * self.value_per_clicks_per_price_idx[arm_idx][sub_idx]) max_clicks, best_budgets = CampaignOptimizer.find_best_budgets( self.campaign) if max_best_clicks < max_clicks: max_best_clicks = max_clicks max_best_budgets = best_budgets # Balance price learner with number of clicks distribution estimation budget_value_to_index = { value: i for i, value in enumerate(self.campaign.get_budgets()) } estimated_clicks = np.array([ self.campaign.get_sub_campaigns()[sub_idx, budget_value_to_index[budget]] for sub_idx, budget in enumerate(max_best_budgets) ]) user_probabilities = estimated_clicks / np.sum(estimated_clicks) self.unique_price_learner: DiscreteBandit = self.price_bandit_class( **self.price_bandit_kwargs) for arm_idx in range(len(self.arm_values)): rewards_per_subcampaign = [] for sub_idx in range(self.campaign.get_n_sub_campaigns()): rewards_per_subcampaign.append( self.rewards_per_arm_per_user_class[sub_idx][arm_idx]) rewards_len = np.array( [len(rewards) for rewards in rewards_per_subcampaign]) solution = np.min(rewards_len / user_probabilities) balanced_rewards_len = np.array(np.floor(solution * user_probabilities), dtype=int) # Clip values of balanced rewards length between 0 and original rewards length balanced_rewards_len = np.maximum(balanced_rewards_len, 0) balanced_rewards_len = np.minimum(balanced_rewards_len, rewards_len) for sub_idx in range(self.campaign.get_n_sub_campaigns()): sampled_rewards = np.random.choice( rewards_per_subcampaign[sub_idx], size=balanced_rewards_len[sub_idx], replace=False) for reward in sampled_rewards: self.unique_price_learner.update(arm_idx, reward) return [budget_value_to_index[budget] for budget in max_best_budgets]
max_value = 0 best_budget_value = [0 for _ in range(mean_scenario.get_n_subcampaigns())] campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(), cum_budget=DAILY_BUDGET, n_arms=N_ARMS_ADS) for price_idx, price in enumerate(prices_arr): for i in range(campaign.get_n_sub_campaigns()): n_clicks = np.array([click_function_list[i].draw_sample(b) for b in np.linspace(0, DAILY_BUDGET, N_ARMS_ADS)]) click_value = crp_function_list[i].draw_sample(price) * (price - FIXED_COST) values: np.array = n_clicks * click_value if i == 2: print(n_clicks) print(click_value) print(values) print("\n") campaign.set_sub_campaign_values(i, values) curr_value, curr_budgets = CampaignOptimizer.find_best_budgets(campaign) if curr_value > max_value: best_profit_idx = price_idx max_value = curr_value best_budget_value = curr_budgets print("Best budget allocation is reached for price = {}\n".format(prices_arr[best_profit_idx])) print("Best budget allocation is {}\n".format(best_budget_value)) print("The expected best daily profit is given by {}\n".format(max_value)) # Instantaneous rewards computation if CSV_REWARD: n_days = len(total_reward_list[0][0]) mean_reward = np.zeros(shape=(n_bandit+1, n_days)) std_reward = np.zeros(shape=(n_bandit+1, n_days))