def get_recogym_configuration(num_products, random_seed=42): return Configuration({ **garden_env_1_args, 'num_products': num_products, 'random_seed': random_seed, 'num_products': num_products, })
def create_agent_and_env_sess_pop(num_products: int, num_organic_users_to_train: int, num_users_to_train: int, num_users_to_score: int, random_seed: int, latent_factor: int, num_flips: int, log_epsilon: float, sigma_omega: float, agent_class, agent_configs, agent_name: str, with_cache: bool, reverse_pop=False): std_env_args = { **env_1_args, 'random_seed': random_seed, 'num_products': num_products, 'K': latent_factor, 'sigma_omega': sigma_omega, 'number_of_flips': num_flips } env = gym.make('reco-gym-v1') sess_pop_agent = OrganicUserEventCounterAgent( Configuration({ **organic_user_count_args, **std_env_args, 'select_randomly': True, 'epsilon': log_epsilon, 'num_products': num_products, 'reverse_pop': reverse_pop })) return env, std_env_args, sess_pop_agent
def __init__(self, config=Configuration(mnb_args)): self.previous_features = None self.previous_action = None self.model = None super(MNBAgent, self).__init__( config, MNBBuilder(config) )
def __init__(self, config=Configuration(keras_nn_args)): self.previous_features = None self.previous_action = None self.model = None super(KerasNNAgent, self).__init__( config, KerasNNModelBuilder(config) )
def __init__(self, config=Configuration(single_action_args)): super(SingleActionAgent, self).__init__(config) print(f"SingleActionAgent %%%% num_products: {config.num_products}") self.organic_views = np.zeros(self.config.num_products) self.act_counter = 0 self.train_counter = 0
def __init__(self, config=Configuration(bandit_count_args)): super(BanditCount, self).__init__(config) self.pulls_a = np.zeros( (self.config.num_products, self.config.num_products)) self.clicks_a = np.zeros( (self.config.num_products, self.config.num_products)) self.last_product_viewed = None self.ctr = (self.clicks_a + 1) / (self.pulls_a + 2)
def __init__(self, config=Configuration(test_agent_args)): nn.Module.__init__(self) Agent.__init__(self, config) self.product_embedding = nn.Embedding(self.config.num_products, self.config.embed_dim) self.output_layer = nn.Linear(self.config.embed_dim, self.config.num_products) # Initializing optimizer type. self.optimizer = self.config.optim_function( self.parameters(), lr=self.config.learning_rate) #Users 'u' index don't all arrive therefore we use an alternative indexing which does #go from 0 to nr_users without skipping a number self.id_to_indx = np.empty( (1, 0), dtype=int ) #u_to_indx[session['u']] gives the place at which user nr 'u' arrived self.indx_to_id = np.empty( (1, 0), dtype=int) #indx_to_u[b] gives the 'u' number for the b'th arrival self.did_click = np.empty( (1, 0), dtype=bool ) #did_click is true for the users which have clicked any product self.last_arrival_id = -1 #u index of last arrival self.nr_arrivals = 0 #counts nr_arrivals there have been in total. # Create matrices we need to perform kNN # For all matrices the row corresponds to the index of the arrival and the # column corresponds to the item. self.M_organic = np.empty( (0, self.config.num_products)) #total nr of organic views self.M_organic_scaled = None #A scaled version of M_organic self.M_bandit_clicks = np.empty( (0, self.config.num_products)) #total nr of clicks self.M_bandit_attempts = np.empty(( 0, self.config.num_products)) #total nr of times we suggested an item # We could use the organic and bandit data of our neighbours. We associate a weight to each. self.weight_organic = 0 self.weight_bandit = 1 # The information about the user we are currently serving self.user_organic = np.zeros((self.config.num_products)) self.user_attempts = np.zeros_like(self.user_organic) self.user_clicks = np.zeros_like(self.user_organic) # We use self.k number of neighbours. This can be given but we overwrite it later on. self.k = self.config.k # The model we train, currently we train it once after training, we could still do online batch training self.knn_model = None #Number of arrivals since last update for online learning self.num_arrivals_since_update = 0
def build_agents(agents_init_data, new_env_args): agents = dict() for agent_key in agents_init_data: agent_init_data = agents_init_data[agent_key] ctor = agent_init_data[AgentInit.CTOR] def_args = agent_init_data[AgentInit.DEF_ARGS] agents[agent_key] = ctor(Configuration({ **def_args, **new_env_args, })) return agents
def __init__(self, config=Configuration(dump_agent_args)): super(DumpAgent, self).__init__(config) self.previous_action = None self.data = { 'case': [], 't': [], 'u': [], 'z': [], 'v': [], 'a': [], 'c': [], 'ps': [], 'ps-a': [], 'done': [], }
def __init__(self, config=Configuration(bandit_mf_square_args)): nn.Module.__init__(self) Agent.__init__(self, config) self.product_embedding = nn.Embedding(self.config.num_products, self.config.embed_dim) self.user_embedding = nn.Embedding(self.config.num_products, self.config.embed_dim) # Initializing optimizer type. self.optimizer = self.config.optim_function( self.parameters(), lr=self.config.learning_rate) self.last_product_viewed = None self.curr_step = 0 self.train_data = []
def __init__(self, config=Configuration(test_agent_args)): nn.Module.__init__(self) Agent.__init__(self, config) self.product_embedding = nn.Embedding(self.config.num_products, self.config.embed_dim) self.output_layer = nn.Linear(self.config.embed_dim, self.config.num_products) # Initializing optimizer type. self.optimizer = self.config.optim_function( self.parameters(), lr=self.config.learning_rate) self.last_product_viewed = None self.curr_step = 0 self.train_data = [] self.action = None
def __init__(self, config=Configuration(test_agent_args)): nn.Module.__init__(self) Agent.__init__(self, config) self.product_embedding = nn.Embedding(self.config.num_products, self.config.embed_dim) self.output_layer = nn.Linear(self.config.embed_dim, self.config.num_products) # Initializing optimizer type. self.optimizer = self.config.optim_function( self.parameters(), lr=self.config.learning_rate) #Users 'u' index don't all arrive therefore we use an alternative indexing which does #go from 0 to nr_users without skipping a number self.id_to_indx = np.empty( (1, 0), dtype=int ) #u_to_indx[session['u']] gives the place at which user nr 'u' arrived self.indx_to_id = np.empty( (1, 0), dtype=int) #indx_to_u[b] gives the 'u' number for the b'th arrival self.did_click = np.empty((1, 0), dtype=bool) self.last_arrival_id = -1 #u index of last arrival self.nr_arrivals = 0 #counts nr_arrivals there have been. # Create matrices we need to perform kNN #sparse.csr_matrix for sparse matrices self.M_organic = np.empty((0, self.config.num_products)) self.M_bandit_clicks = np.empty((0, self.config.num_products)) self.M_bandit_attempts = np.empty((0, self.config.num_products)) self.user_organic = np.zeros((self.config.num_products)) self.weight_organic = 0 self.weight_bandit = 1 self.k = 5 self.last_product_viewed = None self.curr_step = 0 self.train_data = [] self.action = None self.knn_model = None self.M_organic_scaled = None
def __init__(self, config=Configuration(test_agent_args)): self.previous_features = None self.previous_action = None super().__init__(config, CrossConfirmationMNBAgentModelBuilder(config))
def __init__(self, config=Configuration(decision_tree_args)): super(DecisionTreeAgent, self).__init__(config, DecisionTreeModelBuilder(config))
def competition_score( num_products: int, num_organic_users_to_train: int, num_users_to_train: int, num_users_to_score: int, random_seed: int, latent_factor: int, num_flips: int, log_epsilon: float, sigma_omega: float, agent_class, agent_configs, agent_name: str, with_cache: bool, ): training_data_samples = tuple([num_users_to_train]) testing_data_samples = num_users_to_score stat_epochs = 1 stat_epochs_new_random_seed = True std_env_args = { **env_1_args, 'random_seed': random_seed, 'num_products': num_products, 'K': latent_factor, 'sigma_omega': sigma_omega, 'number_of_flips': num_flips } env = gym.make('reco-gym-v1') agent_stats = gather_agent_stats( env, std_env_args, { 'agent': OrganicUserEventCounterAgent( Configuration({ **organic_user_count_args, **std_env_args, 'select_randomly': True, 'epsilon': log_epsilon, 'num_products': num_products, })), }, { **build_agent_init(agent_name, agent_class, { **agent_configs, 'num_products': num_products, }), }, training_data_samples, testing_data_samples, stat_epochs, stat_epochs_new_random_seed, num_organic_users_to_train, with_cache) time_start = datetime.datetime.now() q0_025 = [] q0_500 = [] q0_975 = [] for agent_name in agent_stats[AgentStats.AGENTS]: agent_values = agent_stats[AgentStats.AGENTS][agent_name] q0_025.append(agent_values[AgentStats.Q0_025][0]) q0_500.append(agent_values[AgentStats.Q0_500][0]) q0_975.append(agent_values[AgentStats.Q0_975][0]) time_end = datetime.datetime.now() seconds = (time_end - time_start).total_seconds() return pd.DataFrame({ 'q0.025': q0_025, 'q0.500': q0_500, 'q0.975': q0_975, 'time': [seconds], })
def __init__(self, config=Configuration(pytorch_blr_args)): super(PyTorchBLRAgent, self).__init__(config, PyTorchBLRModelBuilder(config))
def gather_exploration_stats(env, env_args, extra_env_args, agents_init_data, training_approach, num_initial_train_users=1000, num_step_users=1000, epsilons=EvolutionEpsilons, num_evolution_steps=6): """ A helper function that collects data regarding Agents evolution under different values of epsilon for Epsilon-Greedy Selection Policy. :param env: The Environment where evolution should be applied; every time when a new step of the evolution is applied, the Environment is deeply copied thus the Environment does not interferes with evolution steps. :param env_args: Environment arguments (default ones). :param extra_env_args: extra Environment conditions those alter default values. :param agents_init_data: Agent initialisation data. This is a dictionary that has the following structure: { '<Agent Name>': { AgentInit.CTOR: <Constructor>, AgentInit.DEF_ARG: <Default Arguments>, } } :param training_approach: A training approach applied in verification; for mode details look at `TrainingApproach' enum. :param num_initial_train_users: how many users' data should be used to train an initial model BEFORE evolution steps. :param num_step_users: how many users' data should be used at each evolution step. :param epsilons: a list of epsilon values. :param num_evolution_steps: how many evolution steps should be applied for an Agent with Epsilon-Greedy Selection Policy. :return a dictionary of Agent evolution statistics in the form: { 'Agent Name': { 'Epsilon Values': { EvolutionCase.SUCCESS: [an array of clicks (for each ith step of evolution)] EvolutionCase.FAILURE: [an array of failure to draw a click (for each ith step of evolution)] } } } """ # A dictionary that stores all data of Agent evolution statistics. # Key is Agent Name, value is statistics. agent_evolution_stats = dict() new_env_args = { **env_args, **extra_env_args, } new_env = deepcopy(env) new_env.init_gym(new_env_args) agents = build_agents(agents_init_data, new_env_args) for agent_key in agents: print(f"Agent: {agent_key}") agent_stats = dict() with Pool(processes=multiprocessing.cpu_count()) as pool: for result in pool.map(_collect_evolution_stats, [{ 'epsilon': epsilon, 'env': new_env, 'agent': EpsilonGreedy( Configuration({ **epsilon_greedy_args, **new_env_args, 'epsilon': epsilon, }), deepcopy(agents[agent_key])), 'num_initial_train_users': num_initial_train_users, 'num_step_users': num_step_users, 'num_evolution_steps': num_evolution_steps, 'training_approach': training_approach, } for epsilon in epsilons]): agent_stats = { **agent_stats, **result, } agent_evolution_stats[agent_key] = agent_stats return agent_evolution_stats
def __init__(self, config=Configuration(logreg_poly_args)): super(LogregPolyAgent, self).__init__(config, LogregPolyModelBuilder(config))
from recogym.agents import OrganicUserEventCounterAgent, organic_user_count_args from recogym.agents import LogregPolyAgent, logreg_poly_args import numpy as np # Add a new environment here. env_test = { "reco-gym-v1": env_1_args, "reco-gym-v0": env_0_args, } RandomSeed = 42 # Add a new agent here. agent_test = { 'prod2vec': BanditMFSquare(Configuration(bandit_mf_square_args)), 'logistic': BanditCount(Configuration(bandit_count_args)), 'randomagent': RandomAgent(Configuration({ **random_args, 'random_seed': RandomSeed, })), 'logreg_multiclass_ips': LogregMulticlassIpsAgent( Configuration({ **logreg_multiclass_ips_args, 'select_randomly': False, })), 'logreg_multiclass_ips R': LogregMulticlassIpsAgent(
def __init__(self, config = Configuration(bayesian_poly_args)): super(BayesianAgent, self).__init__( config, BayesianModelBuilder(config) )
weather = np.random.triangular(0., mode, 1., size=garden_env_1_args['harvest_period']) # # Step 2. Generate the training data and derive the raw features # In[3]: NUM_PLANTS = 1000 NUM_PRODUCTS = garden_env_1_args['num_products'] organic_counter_agent = SimpleFarmerAgent( Configuration({ **organic_user_count_args, **get_recogym_configuration(NUM_PRODUCTS).__dict__, 'select_randomly': False, })) popularity_policy_logs = get_environement(NUM_PRODUCTS).generate_logs( NUM_PLANTS, organic_counter_agent) # In[4]: class ProductCountFeatureProvider(FeatureProvider): """This feature provider creates a user state based on viewed product count. Namely, the feature vector of shape (n_products, ) contains for each product how many times the user has viewed them organically. """ '''This feature provider actually just gets us the features of the plant that day
def __init__(self, config=Configuration(test_agent_args)): super(TestAgent, self).__init__(config, PersonalOrganicModelBuilder(config))
if __name__ == "__main__": import gym from recogym import env_1_args from recogym.bench_agents import test_agent num_products = 10 num_offline_users = 20 num_online_users = 200 agent = TestAgent( Configuration({ "random_seed": np.random.randint(2**31 - 1), "num_products": num_products, "fallback_threshold": 0.00, "online_training": True, "online_training_batch": 100, "epsilon": 0.01, "latent_factors": 2 })) env_1_args["random_seed"] = 71 env_1_args["num_products"] = num_products env = gym.make("reco-gym-v1") env.init_gym(env_1_args) print( test_agent(env, agent, num_offline_users=num_offline_users, num_online_users=num_online_users))
def __init__(self, config=Configuration(test_agent_args)): super(TestAgent, self).__init__(config) self.co_counts = np.zeros( (self.config.num_products, self.config.num_products)) self.corr = None
def __init__(self, config = Configuration(organic_user_count_args)): super(OrganicUserEventCounterAgent, self).__init__( config, OrganicUserEventCounterModelBuilder(config) )
def __init__(self, config=Configuration(test_agent_args)): super(TestAgent, self).__init__(config, MLPModelBuilder(config))
def __init__(self, config = Configuration(logistic_regression_sklearn_args)): super(LogisticRegression_SKLearnAgent, self).__init__( config, LogisticRegression_SKLearnModelBuilder(config) )
def __init__(self, config = Configuration(pytorch_banditnet_args)): super(PyTorchBanditNetAgent, self).__init__( config, PyTorchBanditNetModelBuilder(config) )
def __init__(self, config=Configuration(test_agent_args)): super(TestAgent, self).__init__(config, MLPModelBuilder(config)) self.previous_features = None self.previous_action = None self.feature_count = 0
def __init__(self, config=Configuration(bayesian_poly_args)): print('ffq') super(BayesianAgentVB, self).__init__(config, BayesianModelBuilderVB(config))