def _optimize(cls, campaign: Campaign) -> (np.ndarray, np.ndarray):
        """
        Optimize the combinatorial problem of the advertising campaign by using a dynamic programming algorithm

        :param campaign: the campaign to be optimized
        :return:
            - the optimization matrix (N+1) x M containing, for each pair (budget, set of sub-campaign), the maximum
              optimized value achievable (value might be, for instance, the number of clicks, or the profit)
            - the maximum indices (N+1) x M related to the optimization matrix containing, for each pair
              (budget, set of sub-campaign), the index of the best budget for the new added sub-campaign w.r.t. previous
              set of sub-campaign (i.e. row)
            where N is the number of sub-campaign and M is the number of discrete budgets
        """
        optimization_matrix = np.zeros(shape=(campaign.get_n_sub_campaigns() +
                                              1, len(campaign.get_budgets())))
        max_idx_matrix = np.full_like(optimization_matrix,
                                      fill_value=-1,
                                      dtype=np.int)
        prev_row = 0
        clicks_matrix = campaign.get_sub_campaigns()
        for row in range(1, optimization_matrix.shape[0]):
            temp_clicks = clicks_matrix[row - 1][::-1]

            for col in range(optimization_matrix.shape[1]):
                cum_sum_clicks = temp_clicks[optimization_matrix.shape[1] -
                                             col - 1:] + optimization_matrix[
                                                 prev_row, :col + 1]
                idx_max = np.argmax(cum_sum_clicks)

                optimization_matrix[row, col] = cum_sum_clicks[idx_max]
                max_idx_matrix[row, col] = col - idx_max
            prev_row = row
        return optimization_matrix, max_idx_matrix
def main(args):
    scenario = EnvironmentManager.load_scenario(args.scenario_name)
    env = PricingAdvertisingJointEnvironment(scenario)

    campaign = Campaign(scenario.get_n_subcampaigns(), args.cum_budget,
                        args.n_arms)
    bandit = build_combinatorial_bandit(bandit_name=args.bandit_name,
                                        campaign=campaign,
                                        init_std=args.init_std,
                                        args=args)
    budget_allocation = [0, 0, 0]

    for t in range(0, args.n_rounds):
        # Choose arm
        budget_allocation_indexes = bandit.pull_arm()
        budget_allocation = [
            int(campaign.get_budgets()[i]) for i in budget_allocation_indexes
        ]

        # Observe reward
        env.set_budget_allocation(budget_allocation=budget_allocation)
        env.next_day()
        rewards = env.get_daily_visits_per_sub_campaign()

        # Update bandit
        bandit.update(pulled_arm=budget_allocation_indexes,
                      observed_reward=rewards)

    return bandit.collected_rewards, budget_allocation
def get_bandit(args, arm_values: np.array, campaign: Campaign) -> IJointBandit:
    bandit_name: str = args.joint_bandit_name
    ads_bandit = build_combinatorial_bandit(bandit_name=args.ads_bandit_name,
                                            campaign=campaign,
                                            init_std=args.init_std,
                                            args=args)
    price_bandit_class, price_bandit_kwargs = get_bandit_class_and_kwargs(
        bandit_name=args.pricing_bandit_name,
        n_arms=len(arm_values),
        arm_values=arm_values,
        args=args)
    price_bandit_list = [
        price_bandit_class(**price_bandit_kwargs)
        for _ in range(campaign.get_n_sub_campaigns())
    ]

    ad_value_strategy = ExpectationAdValueStrategy(np.max(arm_values)) \
        if bandit_name.find("Exp") >= 0 else QuantileAdValueStrategy(np.max(arm_values), args.min_std_q)
    is_learn_visits = True if bandit_name[-1] == 'V' else False

    if bandit_name in ["JBExp", "JBExpV", "JBQV", "JBQ"]:
        bandit = JointBanditDiscriminatory(ads_learner=ads_bandit,
                                           price_learner=price_bandit_list,
                                           campaign=campaign,
                                           ad_value_strategy=ad_value_strategy,
                                           is_learn_visits=is_learn_visits)
    elif bandit_name in ["JBIExpV", "JBIQV"]:
        bandit = JointBanditDiscriminatoryImproved(
            ads_learner=ads_bandit,
            price_learner=price_bandit_list,
            campaign=campaign,
            ad_value_strategy=ad_value_strategy)
    elif bandit_name in ["JBBQ", "JBBExp"]:
        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditBalanced(campaign=campaign,
                                     arm_values=arm_values,
                                     price_learner_class=price_bandit_class,
                                     price_learner_kwargs=price_bandit_kwargs,
                                     number_of_visit_model_list=model_list,
                                     ad_value_strategy=ad_value_strategy)
    elif bandit_name == "JBFQ":
        assert args.daily_price, "This joint bandit requires to run in a daily manner"

        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditFixedDailyPriceQuantile(
            campaign=campaign,
            number_of_visit_model_list=model_list,
            min_std=args.min_std_q,
            arm_profit=arm_values,
            n_arms_profit=len(arm_values))
    elif bandit_name == "JBFTS":
        assert args.daily_price, "This joint bandit requires to run in a daily manner"

        model_list: List[DiscreteRegressor] = [
            DiscreteGPRegressor(list(campaign.get_budgets()),
                                args.init_std,
                                args.alpha,
                                args.n_restart_opt,
                                normalized=True)
            for _ in range(campaign.get_n_sub_campaigns())
        ]
        bandit = JointBanditFixedDailyPriceTS(
            campaign=campaign,
            number_of_visit_model_list=model_list,
            arm_profit=arm_values,
            n_arms_profit=len(arm_values))
    else:
        raise argparse.ArgumentParser(
            "The name of the bandit to be used is not in the available ones")

    return bandit