def __init__(self, campaign: Campaign, number_of_visit_model_list: List[DiscreteRegressor],
                 n_arms_profit: int, arm_profit: np.array, min_std: float):
        assert len(number_of_visit_model_list) == campaign.get_n_sub_campaigns()

        super().__init__(campaign=campaign)

        # Number of visit data structure
        self.number_of_visit_model_list: List[DiscreteRegressor] = number_of_visit_model_list
        self.collected_rewards_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())]
        self.pulled_arm_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())]

        # Pricing data structure
        self.n_arms_price: int = n_arms_profit
        self.arm_profit = arm_profit
        self.profit_arm_reward_list = [[[] for _ in range(n_arms_profit)] for _ in
                                       range(campaign.get_n_sub_campaigns())]
        self.min_std = min_std

        # Current data structure
        self.current_pricing_arm_idx = np.random.randint(low=0, high=n_arms_profit, size=1)
        self.curr_best_budget_idx: List[int] = [0 for _ in range(campaign.get_n_sub_campaigns())]
        self.current_budget_allocation = [0 for _ in range(campaign.get_n_sub_campaigns())]

        # Initializing randomly budget values
        for sub_index, model in enumerate(self.number_of_visit_model_list):
            sub_campaign_values = self.number_of_visit_model_list[sub_index].sample_distribution()
            self.campaign.set_sub_campaign_values(sub_index, sub_campaign_values)
        _, best_budgets = CampaignOptimizer.find_best_budgets(self.campaign)
        self.curr_best_budget_idx = [np.where(self.campaign.get_budgets() == budget)[0][0] for budget in
                                     best_budgets]
    def test_find_best_budgets_1(self):
        campaign = Campaign(2, 100, 3)
        campaign.set_sub_campaign_values(0, [3, 7, 14])
        campaign.set_sub_campaign_values(1, [2, 5, 7])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 16)
        self.assertTrue((best_budgets == np.array([100, 0])).all())
    def update_budget(self, pulled_arm_list: List[int], n_visits: List[float]):
        super(JointBanditFixedDailyPriceQuantile, self).update_budget(pulled_arm_list, n_visits)

        # Update data structure
        for i in range(self.campaign.get_n_sub_campaigns()):
            self.pulled_arm_sub_campaign[i].append(pulled_arm_list[i])
            self.collected_rewards_sub_campaign[i].append(n_visits[i])

        # Update model with new information
        for sub_index, model in enumerate(self.number_of_visit_model_list):
            model.fit_model(collected_rewards=self.collected_rewards_sub_campaign[sub_index],
                            pulled_arm_history=self.pulled_arm_sub_campaign[sub_index])

        # For all the sub-campaigns and profit-arms compute mean and std, and the quantile
        mean_ad_value = np.zeros(shape=(self.campaign.get_n_sub_campaigns(), self.n_arms_price))
        std_ad_value = np.zeros(shape=(self.campaign.get_n_sub_campaigns(), self.n_arms_price))
        percentile = 1 - (1 / (self.day_t + 1))

        for c in range(self.campaign.get_n_sub_campaigns()):
            for arm in range(self.n_arms_price):
                values = np.array(self.profit_arm_reward_list[c][arm])
                mean_ad_value[c][arm] = values.mean() if len(values) > 0 else self.arm_profit[arm]
                std_ad_value[c][arm] = values.std() if len(values) > 0 else self.arm_profit[arm]

        std_ad_value = np.where(std_ad_value < self.min_std, self.min_std, std_ad_value)

        estimated_ad_value = norm.ppf(q=percentile, loc=mean_ad_value, scale=std_ad_value)

        # Sample the number of visits for each sub-campaign
        sample_visit = np.zeros(
            shape=(self.campaign.get_n_sub_campaigns(), len(self.number_of_visit_model_list[0].arms)))
        for c in range(self.campaign.get_n_sub_campaigns()):
            sample_visit[c] = self.number_of_visit_model_list[c].sample_distribution()

        # Joint optimization of advertising and pricing
        best_arm_profit_idx = -1
        curr_max_profit = -1
        curr_best_budget_idx = [-1 for _ in range(self.campaign.get_n_sub_campaigns())]
        for profit_arm_index in range(self.n_arms_price):
            # Set campaign
            for sub_campaign_idx in range(self.campaign.get_n_sub_campaigns()):
                sub_campaign_visits = sample_visit[sub_campaign_idx]
                sub_campaign_values = sub_campaign_visits * estimated_ad_value[sub_campaign_idx][profit_arm_index]
                self.campaign.set_sub_campaign_values(sub_campaign_idx, sub_campaign_values)

            # Campaign optimization
            max_profit, best_budgets = CampaignOptimizer.find_best_budgets(self.campaign)
            if max_profit > curr_max_profit:
                curr_max_profit = max_profit
                curr_best_budget_idx = [np.where(self.campaign.get_budgets() == budget)[0][0] for budget in
                                        best_budgets]
                best_arm_profit_idx = profit_arm_index

        self.current_pricing_arm_idx = best_arm_profit_idx

        self.curr_best_budget_idx = curr_best_budget_idx
    def test_find_best_budgets_2(self):
        campaign = Campaign(3, 90, 4)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 20)
        self.assertTrue((best_budgets == np.array([90, 0, 0])).all())
    def test_find_best_budgets_3(self):
        campaign = Campaign(4, 100, 5)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18])
        campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

        self.assertEqual(max_clicks, 29)
        self.assertTrue((best_budgets == np.array([75, 0, 0, 25])).all())
    def test_optimize_1(self):
        campaign = Campaign(2, 100, 3)
        campaign.set_sub_campaign_values(0, [3, 7, 14])
        campaign.set_sub_campaign_values(1, [2, 5, 7])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0],
                                    [3, 7, 14],
                                    [5, 9, 16]])
        true_max_idx_matrix = np.array([[-1, -1, -1],
                                        [0, 1, 2],
                                        [0, 0, 0]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
    def test_find_best_budgets_4(self):
        campaign = Campaign(3, 100, 11)
        # 100(0.0+0.3)*b = 30*x1 = max 3000
        # 100(0.2+0.2)*b = 40*x2 = max 4000
        # 100(0.4+0.1)*b = 50*x3 = max 5000
        campaign.set_sub_campaign_values(0, [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000])
        campaign.set_sub_campaign_values(1, [0, 400, 800, 1200, 1600, 2000, 2000, 2000, 2000, 2000, 2000])
        campaign.set_sub_campaign_values(2, [0, 500, 1000, 1500, 1500, 1500, 1500, 1500, 1500, 1500, 1500])

        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)
        print(max_clicks)
        print(best_budgets)

        my_best_budgets = np.array([20.0, 50.0, 30.0])

        self.assertTrue((best_budgets == my_best_budgets).all())
    def test_optimize_2(self):
        campaign = Campaign(3, 90, 4)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0, 0],
                                    [0, 3, 12, 20],
                                    [0, 3, 12, 20],
                                    [0, 5, 12, 20]])
        true_max_idx_matrix = np.array([[-1, -1, -1, -1],
                                        [0, 1, 2, 3],
                                        [0, 0, 0, 0],
                                        [0, 1, 0, 0]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
    def pull_arm(self, value_per_clicks: List[float] = None) -> List[int]:
        """
        Find the best allocation of budgets by optimizing the combinatorial problem of the campaign and then return
        the indices of the best budgets.
        The combinatorial problem is optimized given estimates provided only by the last data (amount specified
        by the sliding window)

        :return: the indices of the best budgets given the actual campaign
        """
        if value_per_clicks is None:
            value_per_clicks = np.ones(
                shape=self.campaign.get_n_sub_campaigns())

        temp_campaign: Campaign = copy(self.campaign)
        temp_campaign.multiply_sub_campaign_values(value_per_clicks)
        max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(
            temp_campaign)
        return [
            np.where(temp_campaign.get_budgets() == budget)[0][0]
            for budget in best_budgets
        ]
    def test_optimize_3(self):
        campaign = Campaign(4, 100, 5)
        campaign.set_sub_campaign_values(0, [0, 3, 12, 20, 15])
        campaign.set_sub_campaign_values(1, [0, 2, 7, 10, 9])
        campaign.set_sub_campaign_values(2, [0, 5, 8, 12, 18])
        campaign.set_sub_campaign_values(3, [0, 9, 9, 10, 7])

        opt_matrix, max_idx_matrix = CampaignOptimizer._optimize(campaign)

        true_opt_matrix = np.array([[0, 0, 0, 0, 0],
                                    [0, 3, 12, 20, 20],
                                    [0, 3, 12, 20, 22],
                                    [0, 5, 12, 20, 25],
                                    [0, 9, 14, 21, 29]])
        true_max_idx_matrix = np.array([[-1, -1, -1, -1, -1],
                                        [0, 1, 2, 3, 3],
                                        [0, 0, 0, 0, 1],
                                        [0, 1, 0, 0, 1],
                                        [0, 1, 1, 1, 1]])

        self.assertTrue((true_opt_matrix == opt_matrix).all())
        self.assertTrue((true_max_idx_matrix == max_idx_matrix).all())
예제 #11
0
mean_scenario: Scenario = EnvironmentManager.load_scenario(
    SCENARIO_NAME, get_mean_function=True)
click_function_list: List[IStochasticFunction] = mean_scenario.get_phases(
)[0].get_n_clicks_function()

# Optimal point computation
campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(),
                    cum_budget=BUDGET,
                    n_arms=N_ARMS_ADV)
for i in range(campaign.get_n_sub_campaigns()):
    sub_campaign_values = [
        click_function_list[i].draw_sample(b)
        for b in np.linspace(0, BUDGET, N_ARMS_ADV)
    ]
    campaign.set_sub_campaign_values(i, sub_campaign_values)
max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

# # Compute regret
# if CSV_CUM_REGRET:
#     mean_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     std_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     mean_regret_data[-1] = np.arange(n_days) + 1
#     std_regret_data[-1] = np.arange(n_days) + 1
#
#     for bandit_idx in range(len(BANDIT_NAME)):
#         n_exp = len(total_reward_list[bandit_idx])
#
#         for curr_day in range(n_days):
#             daily_values = []
#             for exp in range(n_exp):
#                 daily_values.append(max_clicks - total_reward_list[bandit_idx][exp][0][curr_day])
prices_arr = np.linspace(MIN_PRICE, MAX_PRICE, N_ARMS_PRICE)
best_profit_idx = 0
max_value = 0
best_budget_value = [0 for _ in range(mean_scenario.get_n_subcampaigns())]
campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(), cum_budget=DAILY_BUDGET, n_arms=N_ARMS_ADS)

for i in range(campaign.get_n_sub_campaigns()):
    n_clicks = np.array([click_function_list[i].draw_sample(b)
                         for b in np.linspace(0, DAILY_BUDGET, N_ARMS_ADS)])
    click_values_per_price = [crp_function_list[i].draw_sample(price) *
                              (price - FIXED_COST) for price in prices_arr]
    best_profit_idx = int(np.argmax(click_values_per_price))
    print("Best price for subcampaign {} is {}".format(i+1, prices_arr[best_profit_idx]))
    values = n_clicks * click_values_per_price[best_profit_idx]
    campaign.set_sub_campaign_values(i, values)
max_value, best_budget_value = CampaignOptimizer.find_best_budgets(campaign)

print("Best budget allocation is {}\n".format(best_budget_value))
print("The expected best daily profit is given by {}\n".format(max_value))

# Instantaneous rewards computation
if CSV_REWARD:
    n_days = len(total_reward_list[0][0])
    mean_reward = np.zeros(shape=(n_bandit+1, n_days))
    std_reward = np.zeros(shape=(n_bandit+1, n_days))
    mean_reward[-1] = np.arange(n_days)
    std_reward[-1] = np.arange(n_days)

    for bandit_idx in range(n_bandit):
        n_exp = len(total_reward_list[bandit_idx])
    def pull_budget(self) -> List[int]:
        max_best_clicks = 0
        max_best_budgets = None

        for arm_idx in range(len(self.arm_values)):
            subcampaign_values = self.campaign.get_sub_campaigns()
            arm_value_campaign = Campaign(self.campaign.get_n_sub_campaigns(),
                                          self.campaign.get_cum_budget(),
                                          len(self.campaign.get_budgets()))
            for sub_idx in range(len(subcampaign_values)):
                arm_value_campaign.set_sub_campaign_values(
                    sub_idx, subcampaign_values[sub_idx] *
                    self.value_per_clicks_per_price_idx[arm_idx][sub_idx])
            max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(
                self.campaign)

            if max_best_clicks < max_clicks:
                max_best_clicks = max_clicks
                max_best_budgets = best_budgets

        # Balance price learner with number of clicks distribution estimation
        budget_value_to_index = {
            value: i
            for i, value in enumerate(self.campaign.get_budgets())
        }
        estimated_clicks = np.array([
            self.campaign.get_sub_campaigns()[sub_idx,
                                              budget_value_to_index[budget]]
            for sub_idx, budget in enumerate(max_best_budgets)
        ])
        user_probabilities = estimated_clicks / np.sum(estimated_clicks)

        self.unique_price_learner: DiscreteBandit = self.price_bandit_class(
            **self.price_bandit_kwargs)

        for arm_idx in range(len(self.arm_values)):
            rewards_per_subcampaign = []
            for sub_idx in range(self.campaign.get_n_sub_campaigns()):
                rewards_per_subcampaign.append(
                    self.rewards_per_arm_per_user_class[sub_idx][arm_idx])
            rewards_len = np.array(
                [len(rewards) for rewards in rewards_per_subcampaign])

            solution = np.min(rewards_len / user_probabilities)
            balanced_rewards_len = np.array(np.floor(solution *
                                                     user_probabilities),
                                            dtype=int)

            # Clip values of balanced rewards length between 0 and original rewards length
            balanced_rewards_len = np.maximum(balanced_rewards_len, 0)
            balanced_rewards_len = np.minimum(balanced_rewards_len,
                                              rewards_len)

            for sub_idx in range(self.campaign.get_n_sub_campaigns()):
                sampled_rewards = np.random.choice(
                    rewards_per_subcampaign[sub_idx],
                    size=balanced_rewards_len[sub_idx],
                    replace=False)
                for reward in sampled_rewards:
                    self.unique_price_learner.update(arm_idx, reward)

        return [budget_value_to_index[budget] for budget in max_best_budgets]
예제 #14
0
max_value = 0
best_budget_value = [0 for _ in range(mean_scenario.get_n_subcampaigns())]
campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(), cum_budget=DAILY_BUDGET, n_arms=N_ARMS_ADS)
for price_idx, price in enumerate(prices_arr):
    for i in range(campaign.get_n_sub_campaigns()):
        n_clicks = np.array([click_function_list[i].draw_sample(b)
                             for b in np.linspace(0, DAILY_BUDGET, N_ARMS_ADS)])
        click_value = crp_function_list[i].draw_sample(price) * (price - FIXED_COST)
        values: np.array = n_clicks * click_value
        if i == 2:
            print(n_clicks)
            print(click_value)
            print(values)
            print("\n")
        campaign.set_sub_campaign_values(i, values)
    curr_value, curr_budgets = CampaignOptimizer.find_best_budgets(campaign)

    if curr_value > max_value:
        best_profit_idx = price_idx
        max_value = curr_value
        best_budget_value = curr_budgets

print("Best budget allocation is reached for price = {}\n".format(prices_arr[best_profit_idx]))
print("Best budget allocation is {}\n".format(best_budget_value))
print("The expected best daily profit is given by {}\n".format(max_value))

# Instantaneous rewards computation
if CSV_REWARD:
    n_days = len(total_reward_list[0][0])
    mean_reward = np.zeros(shape=(n_bandit+1, n_days))
    std_reward = np.zeros(shape=(n_bandit+1, n_days))