def __init__(self, campaign: Campaign,
                 model_list: List[DiscreteRegressor]):
        """
        :param campaign: campaign that will be optimized
        :param model_list: regressors that will be used to estimate the quantity in each campaign. There is a regressor
        for each campaign
        """
        assert len(model_list) == campaign.get_n_sub_campaigns()

        self.campaign: Campaign = campaign
        self.t: int = 0
        self.collected_rewards: List[float] = []
        self.pulled_superarm_list: List[List] = []
        self.model_list: List[DiscreteRegressor] = model_list

        self.collected_rewards_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]
        self.pulled_arm_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]

        for sub_index, model in enumerate(self.model_list):
            sub_campaign_values = self.model_list[
                sub_index].sample_distribution()
            self.campaign.set_sub_campaign_values(sub_index,
                                                  sub_campaign_values)
    def _optimize(cls, campaign: Campaign) -> (np.ndarray, np.ndarray):
        """
        Optimize the combinatorial problem of the advertising campaign by using a dynamic programming algorithm

        :param campaign: the campaign to be optimized
        :return:
            - the optimization matrix (N+1) x M containing, for each pair (budget, set of sub-campaign), the maximum
              optimized value achievable (value might be, for instance, the number of clicks, or the profit)
            - the maximum indices (N+1) x M related to the optimization matrix containing, for each pair
              (budget, set of sub-campaign), the index of the best budget for the new added sub-campaign w.r.t. previous
              set of sub-campaign (i.e. row)
            where N is the number of sub-campaign and M is the number of discrete budgets
        """
        optimization_matrix = np.zeros(shape=(campaign.get_n_sub_campaigns() +
                                              1, len(campaign.get_budgets())))
        max_idx_matrix = np.full_like(optimization_matrix,
                                      fill_value=-1,
                                      dtype=np.int)
        prev_row = 0
        clicks_matrix = campaign.get_sub_campaigns()
        for row in range(1, optimization_matrix.shape[0]):
            temp_clicks = clicks_matrix[row - 1][::-1]

            for col in range(optimization_matrix.shape[1]):
                cum_sum_clicks = temp_clicks[optimization_matrix.shape[1] -
                                             col - 1:] + optimization_matrix[
                                                 prev_row, :col + 1]
                idx_max = np.argmax(cum_sum_clicks)

                optimization_matrix[row, col] = cum_sum_clicks[idx_max]
                max_idx_matrix[row, col] = col - idx_max
            prev_row = row
        return optimization_matrix, max_idx_matrix
    def __init__(self, campaign: Campaign, arm_values: np.ndarray,
                 price_learner_class: DiscreteBandit.__class__,
                 price_learner_kwargs,
                 number_of_visit_model_list: List[DiscreteRegressor],
                 ad_value_strategy: AdValueStrategy):
        super().__init__(campaign)
        assert len(
            number_of_visit_model_list) == campaign.get_n_sub_campaigns()

        # General problem data
        self.arm_values: np.ndarray = arm_values

        # Data structure to save overall data
        self.collected_rewards_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]
        self.pulled_arm_sub_campaign: List[List] = [
            [] for _ in range(campaign.get_n_sub_campaigns())
        ]

        self.value_per_clicks_per_price_idx: Dict[int, List[float]] = {}
        for arm_idx in range(len(self.arm_values)):
            rewards_per_subcampaign = [
                [] for _ in range(self.campaign.get_n_sub_campaigns())
            ]

            self.value_per_clicks_per_price_idx[arm_idx] = ad_value_strategy. \
                get_estimated_value_per_clicks(rewards_per_subcampaign, self.day_t)

        self.rewards_per_arm_per_user_class: Dict[
            int, List[List[float]]] = defaultdict(list)
        for i in range(self.campaign.get_n_sub_campaigns()):
            for j in range(len(self.arm_values)):
                self.rewards_per_arm_per_user_class[i].append([])

        # Learners
        self.price_bandit_class = price_learner_class
        self.price_bandit_kwargs = price_learner_kwargs

        self.unique_price_learner: DiscreteBandit = self.price_bandit_class(
            **self.price_bandit_kwargs)
        self.number_of_visit_model_list: List[
            DiscreteRegressor] = number_of_visit_model_list
        self.ad_value_strategy: AdValueStrategy = ad_value_strategy
    def __init__(self, campaign: Campaign, ads_learner: CombinatorialBandit,
                 price_learner: List[DiscreteBandit],
                 ad_value_strategy: AdValueStrategy):
        assert len(price_learner) == campaign.get_n_sub_campaigns()

        super().__init__(campaign=campaign)
        self.ads_learner: CombinatorialBandit = ads_learner
        self.price_learner: List[DiscreteBandit] = price_learner
        self.n_arms = price_learner[0].n_arms
        self.ad_value_strategy = ad_value_strategy

        self.estimated_ad_value = None
    def __init__(self, campaign: Campaign, number_of_visit_model_list: List[DiscreteRegressor],
                 n_arms_profit: int, arm_profit: np.array, min_std: float):
        assert len(number_of_visit_model_list) == campaign.get_n_sub_campaigns()

        super().__init__(campaign=campaign)

        # Number of visit data structure
        self.number_of_visit_model_list: List[DiscreteRegressor] = number_of_visit_model_list
        self.collected_rewards_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())]
        self.pulled_arm_sub_campaign: List[List] = [[] for _ in range(campaign.get_n_sub_campaigns())]

        # Pricing data structure
        self.n_arms_price: int = n_arms_profit
        self.arm_profit = arm_profit
        self.profit_arm_reward_list = [[[] for _ in range(n_arms_profit)] for _ in
                                       range(campaign.get_n_sub_campaigns())]
        self.min_std = min_std

        # Current data structure
        self.current_pricing_arm_idx = np.random.randint(low=0, high=n_arms_profit, size=1)
        self.curr_best_budget_idx: List[int] = [0 for _ in range(campaign.get_n_sub_campaigns())]
        self.current_budget_allocation = [0 for _ in range(campaign.get_n_sub_campaigns())]

        # Initializing randomly budget values
        for sub_index, model in enumerate(self.number_of_visit_model_list):
            sub_campaign_values = self.number_of_visit_model_list[sub_index].sample_distribution()
            self.campaign.set_sub_campaign_values(sub_index, sub_campaign_values)
        _, best_budgets = CampaignOptimizer.find_best_budgets(self.campaign)
        self.curr_best_budget_idx = [np.where(self.campaign.get_budgets() == budget)[0][0] for budget in
                                     best_budgets]
Esempio n. 6
0
total_df = mean_df.merge(std_df, left_on="day", right_on="day")
total_df.to_csv("{}instant_reward.csv".format(folder_path_with_date),
                index=False)

# Something
mean_scenario: Scenario = EnvironmentManager.load_scenario(
    SCENARIO_NAME, get_mean_function=True)
click_function_list: List[IStochasticFunction] = mean_scenario.get_phases(
)[0].get_n_clicks_function()

# Optimal point computation
campaign = Campaign(n_sub_campaigns=mean_scenario.get_n_subcampaigns(),
                    cum_budget=BUDGET,
                    n_arms=N_ARMS_ADV)
for i in range(campaign.get_n_sub_campaigns()):
    sub_campaign_values = [
        click_function_list[i].draw_sample(b)
        for b in np.linspace(0, BUDGET, N_ARMS_ADV)
    ]
    campaign.set_sub_campaign_values(i, sub_campaign_values)
max_clicks, best_budgets = CampaignOptimizer.find_best_budgets(campaign)

# # Compute regret
# if CSV_CUM_REGRET:
#     mean_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     std_regret_data = np.zeros(shape=(n_bandit + 1, n_days))
#     mean_regret_data[-1] = np.arange(n_days) + 1
#     std_regret_data[-1] = np.arange(n_days) + 1
#
#     for bandit_idx in range(len(BANDIT_NAME)):
def get_bandit(args, arm_values: np.array, campaign: Campaign) -> IJointBandit:
    bandit_name: str = args.joint_bandit_name
    ads_bandit = build_combinatorial_bandit(bandit_name=args.ads_bandit_name,
                                            campaign=campaign,
                                            init_std=args.init_std,
                                            args=args)
    price_bandit_class, price_bandit_kwargs = get_bandit_class_and_kwargs(
        bandit_name=args.pricing_bandit_name,
        n_arms=len(arm_values),
        arm_values=arm_values,
        args=args)
    price_bandit_list = [
        price_bandit_class(**price_bandit_kwargs)
        for _ in range(campaign.get_n_sub_campaigns())
    ]

    ad_value_strategy = ExpectationAdValueStrategy(np.max(arm_values)) \
        if bandit_name.find("Exp") >= 0 else QuantileAdValueStrategy(np.max(arm_values), args.min_std_q)
    is_learn_visits = True if bandit_name[-1] == 'V' else False

    if bandit_name in ["JBExp", "JBExpV", "JBQV", "JBQ"]:
        bandit = JointBanditDiscriminatory(ads_learner=ads_bandit,
                                           price_learner=price_bandit_list,
                                           campaign=campaign,
                                           ad_value_strategy=ad_value_strategy,
                                           is_learn_visits=is_learn_visits)
    elif bandit_name in ["JBIExpV", "JBIQV"]:
        bandit = JointBanditDiscriminatoryImproved(
            ads_learner=ads_bandit,
            price_learner=price_bandit_list,
            campaign=campaign,
            ad_value_strategy=ad_value_strategy)
    elif bandit_name in ["JBBQ", "JBBExp"]:
        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditBalanced(campaign=campaign,
                                     arm_values=arm_values,
                                     price_learner_class=price_bandit_class,
                                     price_learner_kwargs=price_bandit_kwargs,
                                     number_of_visit_model_list=model_list,
                                     ad_value_strategy=ad_value_strategy)
    elif bandit_name == "JBFQ":
        assert args.daily_price, "This joint bandit requires to run in a daily manner"

        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditFixedDailyPriceQuantile(
            campaign=campaign,
            number_of_visit_model_list=model_list,
            min_std=args.min_std_q,
            arm_profit=arm_values,
            n_arms_profit=len(arm_values))
    elif bandit_name == "JBFTS":
        assert args.daily_price, "This joint bandit requires to run in a daily manner"

        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditFixedDailyPriceTS(
            campaign=campaign,
            number_of_visit_model_list=model_list,
            arm_profit=arm_values,
            n_arms_profit=len(arm_values))
    else:
        raise argparse.ArgumentParser(
            "The name of the bandit to be used is not in the available ones")

    return bandit