def __init__(self, config = Configuration(bandit_count_args)): super(BanditCount, self).__init__(config) self.pulls_a = np.zeros((self.config.num_products, self.config.num_products)) self.clicks_a = np.zeros((self.config.num_products, self.config.num_products)) self.last_product_viewed = None self.ctr = (self.clicks_a + 1) / (self.pulls_a + 2)
def build_agents(agents_init_data, new_env_args): agents = dict() for agent_key in agents_init_data: agent_init_data = agents_init_data[agent_key] ctor = agent_init_data[AgentInit.CTOR] def_args = agent_init_data[AgentInit.DEF_ARGS] agents[agent_key] = ctor(Configuration({ **def_args, **new_env_args, })) return agents
def __init__(self, config=Configuration(bandit_mf_square_args)): nn.Module.__init__(self) Agent.__init__(self, config) self.product_embedding = nn.Embedding(self.config.num_products, self.config.embed_dim) self.user_embedding = nn.Embedding(self.config.num_products, self.config.embed_dim) # Initializing optimizer type. self.optimizer = self.config.optim_function( self.parameters(), lr=self.config.learning_rate) self.last_product_viewed = None self.curr_step = 0 self.train_data = []
def __init__(self, config=Configuration(organic_mf_square_args)): nn.Module.__init__(self) Agent.__init__(self, config) self.product_embedding = nn.Embedding(self.config.num_products, self.embed_dim) self.output_layer = nn.Linear(self.embed_dim, self.config.num_products) # Initializing optimizer type. self.optimizer = self.optim_function(self.parameters(), lr=self.learning_rate) self.last_product_viewed = None self.curr_step = 0 self.train_data = [] self.action = None
def __init__(self, config=Configuration(random_args)): super(RandomAgent, self).__init__(config) self.rng = RandomState(config.random_seed)
) print('#' * 48) # Environment parameters std_env_args = { **env_1_args, 'random_seed': RandomSeed, 'num_products': num_products, } env = gym.make('reco-gym-v1') # Logging policy logger = OrganicUserEventCounterAgent( Configuration({ **organic_user_count_args, **std_env_args, 'select_randomly': True, 'epsilon': 0 })) std_extra_env_args = { 'num_products': num_products, 'number_of_flips': num_products // 2, 'agent': logger, } ################################################### # Grid searches for POEM and Dual Bandit variants # ################################################### # Original POEM lambdas_poem_no_log = [.0, .05, .1, .25, .5, 1.0, 2.0]
def __init__(self, config=Configuration(logreg_poly_args)): super(LogregPolyAgent, self).__init__(config, LogregPolyModelBuilder(config))
for num_products in [10, 25, 100]: print('#' * 48) print(f'{datetime.now()}\tLogging Uniform with {num_products} products...') print('#' * 48) # Environment parameters std_env_args = { **env_1_args, 'random_seed': RandomSeed, 'num_products': num_products, } env = gym.make('reco-gym-v1') # Logging policy logger = RandomAgent(Configuration({**random_args, **std_env_args})) std_extra_env_args = { 'num_products': num_products, 'number_of_flips': num_products // 2, 'agent': logger, } ################################################### # Grid searches for POEM and Dual Bandit variants # ################################################### # Original POEM lambdas_poem_no_log = [.0, .05, .1, .25, .5, 1.0, 2.0] poem_no_log_mlr_args = { **pytorch_mlr_args,
def __init__(self, config=Configuration(nn_ips_args)): super(NnIpsAgent, self).__init__(config, NnIpsModelBuilder(config))
def __init__(self, config=Configuration(logreg_multiclass_ips_args)): super(LogregMulticlassIpsAgent, self).__init__(config, LogregMulticlassIpsModelBuilder(config))
def __init__(self, config = Configuration(skyline_args)): super(SkylineAgent, self).__init__( config, SkylineModelBuilder(config) ) self.skyline = True
def __init__(self, config = Configuration(organic_user_count_args)): super(OrganicUserEventCounterAgent, self).__init__( config, OrganicUserEventCounterModelBuilder(config) )
from agents import NnIpsAgent, nn_ips_args from agents import OrganicCount, organic_count_args from agents import OrganicUserEventCounterAgent, organic_user_count_args from agents import LogregPolyAgent, logreg_poly_args # Add a new environment here. env_test = { "reco-gym-v1": env_1_args, "reco-gym-v0": env_0_args, } RandomSeed = 42 # Add a new agent here. agent_test = { 'prod2vec': BanditMFSquare(Configuration(bandit_mf_square_args)), 'logistic': BanditCount(Configuration(bandit_count_args)), 'randomagent': RandomAgent(Configuration({ **random_args, 'random_seed': RandomSeed, })), 'logreg_multiclass_ips': LogregMulticlassIpsAgent(Configuration({ **logreg_multiclass_ips_args, 'select_randomly': False, })), 'logreg_multiclass_ips R': LogregMulticlassIpsAgent(Configuration({ **logreg_multiclass_ips_args, 'select_randomly': True, 'random_seed': RandomSeed, })), 'nn_ips': NnIpsAgent(Configuration({
def gather_exploration_stats(env, env_args, extra_env_args, agents_init_data, training_approach, num_initial_train_users=1000, num_step_users=1000, epsilons=EvolutionEpsilons, num_evolution_steps=6): """ A helper function that collects data regarding Agents evolution under different values of epsilon for Epsilon-Greedy Selection Policy. :param env: The Environment where evolution should be applied; every time when a new step of the evolution is applied, the Environment is deeply copied thus the Environment does not interferes with evolution steps. :param env_args: Environment arguments (default ones). :param extra_env_args: extra Environment conditions those alter default values. :param agents_init_data: Agent initialisation data. This is a dictionary that has the following structure: { '<Agent Name>': { AgentInit.CTOR: <Constructor>, AgentInit.DEF_ARG: <Default Arguments>, } } :param training_approach: A training approach applied in verification; for mode details look at `TrainingApproach' enum. :param num_initial_train_users: how many users' data should be used to train an initial model BEFORE evolution steps. :param num_step_users: how many users' data should be used at each evolution step. :param epsilons: a list of epsilon values. :param num_evolution_steps: how many evolution steps should be applied for an Agent with Epsilon-Greedy Selection Policy. :return a dictionary of Agent evolution statistics in the form: { 'Agent Name': { 'Epsilon Values': { EvolutionCase.SUCCESS: [an array of clicks (for each ith step of evolution)] EvolutionCase.FAILURE: [an array of failure to draw a click (for each ith step of evolution)] } } } """ # A dictionary that stores all data of Agent evolution statistics. # Key is Agent Name, value is statistics. agent_evolution_stats = dict() new_env_args = { **env_args, **extra_env_args, } new_env = deepcopy(env) new_env.init_gym(new_env_args) agents = build_agents(agents_init_data, new_env_args) for agent_key in agents: print(f"Agent: {agent_key}") agent_stats = dict() with Pool(processes=multiprocessing.cpu_count()) as pool: for result in pool.map(_collect_evolution_stats, [{ 'epsilon': epsilon, 'env': new_env, 'agent': EpsilonGreedy( Configuration({ **epsilon_greedy_args, **new_env_args, 'epsilon': epsilon, }), deepcopy(agents[agent_key])), 'num_initial_train_users': num_initial_train_users, 'num_step_users': num_step_users, 'num_evolution_steps': num_evolution_steps, 'training_approach': training_approach, } for epsilon in epsilons]): agent_stats = { **agent_stats, **result, } agent_evolution_stats[agent_key] = agent_stats return agent_evolution_stats