def _optimize(cls, campaign: Campaign) -> (np.ndarray, np.ndarray): """ Optimize the combinatorial problem of the advertising campaign by using a dynamic programming algorithm :param campaign: the campaign to be optimized :return: - the optimization matrix (N+1) x M containing, for each pair (budget, set of sub-campaign), the maximum optimized value achievable (value might be, for instance, the number of clicks, or the profit) - the maximum indices (N+1) x M related to the optimization matrix containing, for each pair (budget, set of sub-campaign), the index of the best budget for the new added sub-campaign w.r.t. previous set of sub-campaign (i.e. row) where N is the number of sub-campaign and M is the number of discrete budgets """ optimization_matrix = np.zeros(shape=(campaign.get_n_sub_campaigns() + 1, len(campaign.get_budgets()))) max_idx_matrix = np.full_like(optimization_matrix, fill_value=-1, dtype=np.int) prev_row = 0 clicks_matrix = campaign.get_sub_campaigns() for row in range(1, optimization_matrix.shape[0]): temp_clicks = clicks_matrix[row - 1][::-1] for col in range(optimization_matrix.shape[1]): cum_sum_clicks = temp_clicks[optimization_matrix.shape[1] - col - 1:] + optimization_matrix[ prev_row, :col + 1] idx_max = np.argmax(cum_sum_clicks) optimization_matrix[row, col] = cum_sum_clicks[idx_max] max_idx_matrix[row, col] = col - idx_max prev_row = row return optimization_matrix, max_idx_matrix
def main(args): scenario = EnvironmentManager.load_scenario(args.scenario_name) env = PricingAdvertisingJointEnvironment(scenario) campaign = Campaign(scenario.get_n_subcampaigns(), args.cum_budget, args.n_arms) bandit = build_combinatorial_bandit(bandit_name=args.bandit_name, campaign=campaign, init_std=args.init_std, args=args) budget_allocation = [0, 0, 0] for t in range(0, args.n_rounds): # Choose arm budget_allocation_indexes = bandit.pull_arm() budget_allocation = [ int(campaign.get_budgets()[i]) for i in budget_allocation_indexes ] # Observe reward env.set_budget_allocation(budget_allocation=budget_allocation) env.next_day() rewards = env.get_daily_visits_per_sub_campaign() # Update bandit bandit.update(pulled_arm=budget_allocation_indexes, observed_reward=rewards) return bandit.collected_rewards, budget_allocation
def get_bandit(args, arm_values: np.array, campaign: Campaign) -> IJointBandit: bandit_name: str = args.joint_bandit_name ads_bandit = build_combinatorial_bandit(bandit_name=args.ads_bandit_name, campaign=campaign, init_std=args.init_std, args=args) price_bandit_class, price_bandit_kwargs = get_bandit_class_and_kwargs( bandit_name=args.pricing_bandit_name, n_arms=len(arm_values), arm_values=arm_values, args=args) price_bandit_list = [ price_bandit_class(**price_bandit_kwargs) for _ in range(campaign.get_n_sub_campaigns()) ] ad_value_strategy = ExpectationAdValueStrategy(np.max(arm_values)) \ if bandit_name.find("Exp") >= 0 else QuantileAdValueStrategy(np.max(arm_values), args.min_std_q) is_learn_visits = True if bandit_name[-1] == 'V' else False if bandit_name in ["JBExp", "JBExpV", "JBQV", "JBQ"]: bandit = JointBanditDiscriminatory(ads_learner=ads_bandit, price_learner=price_bandit_list, campaign=campaign, ad_value_strategy=ad_value_strategy, is_learn_visits=is_learn_visits) elif bandit_name in ["JBIExpV", "JBIQV"]: bandit = JointBanditDiscriminatoryImproved( ads_learner=ads_bandit, price_learner=price_bandit_list, campaign=campaign, ad_value_strategy=ad_value_strategy) elif bandit_name in ["JBBQ", "JBBExp"]: model_list: List[DiscreteRegressor] = [ DiscreteGPRegressor(list(campaign.get_budgets()), args.init_std, args.alpha, args.n_restart_opt, normalized=True) for _ in range(campaign.get_n_sub_campaigns()) ] bandit = JointBanditBalanced(campaign=campaign, arm_values=arm_values, price_learner_class=price_bandit_class, price_learner_kwargs=price_bandit_kwargs, number_of_visit_model_list=model_list, ad_value_strategy=ad_value_strategy) elif bandit_name == "JBFQ": assert args.daily_price, "This joint bandit requires to run in a daily manner" model_list: List[DiscreteRegressor] = [ DiscreteGPRegressor(list(campaign.get_budgets()), args.init_std, args.alpha, args.n_restart_opt, normalized=True) for _ in range(campaign.get_n_sub_campaigns()) ] bandit = JointBanditFixedDailyPriceQuantile( campaign=campaign, number_of_visit_model_list=model_list, min_std=args.min_std_q, arm_profit=arm_values, n_arms_profit=len(arm_values)) elif bandit_name == "JBFTS": assert args.daily_price, "This joint bandit requires to run in a daily manner" model_list: List[DiscreteRegressor] = [ DiscreteGPRegressor(list(campaign.get_budgets()), args.init_std, args.alpha, args.n_restart_opt, normalized=True) for _ in range(campaign.get_n_sub_campaigns()) ] bandit = JointBanditFixedDailyPriceTS( campaign=campaign, number_of_visit_model_list=model_list, arm_profit=arm_values, n_arms_profit=len(arm_values)) else: raise argparse.ArgumentParser( "The name of the bandit to be used is not in the available ones") return bandit