Example #1
0
    def __init__(self, config = Configuration(bandit_count_args)):
        super(BanditCount, self).__init__(config)

        self.pulls_a = np.zeros((self.config.num_products, self.config.num_products))
        self.clicks_a = np.zeros((self.config.num_products, self.config.num_products))
        self.last_product_viewed = None
        self.ctr = (self.clicks_a + 1) / (self.pulls_a + 2)
Example #2
0
def build_agents(agents_init_data, new_env_args):
    agents = dict()
    for agent_key in agents_init_data:
        agent_init_data = agents_init_data[agent_key]
        ctor = agent_init_data[AgentInit.CTOR]
        def_args = agent_init_data[AgentInit.DEF_ARGS]
        agents[agent_key] = ctor(Configuration({
            **def_args,
            **new_env_args,
        }))
    return agents
Example #3
0
    def __init__(self, config=Configuration(bandit_mf_square_args)):
        nn.Module.__init__(self)
        Agent.__init__(self, config)

        self.product_embedding = nn.Embedding(self.config.num_products,
                                              self.config.embed_dim)
        self.user_embedding = nn.Embedding(self.config.num_products,
                                           self.config.embed_dim)

        # Initializing optimizer type.
        self.optimizer = self.config.optim_function(
            self.parameters(), lr=self.config.learning_rate)

        self.last_product_viewed = None
        self.curr_step = 0
        self.train_data = []
    def __init__(self, config=Configuration(organic_mf_square_args)):
        nn.Module.__init__(self)
        Agent.__init__(self, config)

        self.product_embedding = nn.Embedding(self.config.num_products,
                                              self.embed_dim)

        self.output_layer = nn.Linear(self.embed_dim, self.config.num_products)

        # Initializing optimizer type.
        self.optimizer = self.optim_function(self.parameters(),
                                             lr=self.learning_rate)

        self.last_product_viewed = None
        self.curr_step = 0
        self.train_data = []
        self.action = None
 def __init__(self, config=Configuration(random_args)):
     super(RandomAgent, self).__init__(config)
     self.rng = RandomState(config.random_seed)
    )
    print('#' * 48)

    # Environment parameters
    std_env_args = {
        **env_1_args,
        'random_seed': RandomSeed,
        'num_products': num_products,
    }
    env = gym.make('reco-gym-v1')

    # Logging policy
    logger = OrganicUserEventCounterAgent(
        Configuration({
            **organic_user_count_args,
            **std_env_args, 'select_randomly': True,
            'epsilon': 0
        }))

    std_extra_env_args = {
        'num_products': num_products,
        'number_of_flips': num_products // 2,
        'agent': logger,
    }

    ###################################################
    # Grid searches for POEM and Dual Bandit variants #
    ###################################################

    # Original POEM
    lambdas_poem_no_log = [.0, .05, .1, .25, .5, 1.0, 2.0]
Example #7
0
 def __init__(self, config=Configuration(logreg_poly_args)):
     super(LogregPolyAgent, self).__init__(config,
                                           LogregPolyModelBuilder(config))
for num_products in [10, 25, 100]:
    print('#' * 48)
    print(f'{datetime.now()}\tLogging Uniform with {num_products} products...')
    print('#' * 48)

    # Environment parameters
    std_env_args = {
        **env_1_args,
        'random_seed': RandomSeed,
        'num_products': num_products,
    }
    env = gym.make('reco-gym-v1')

    # Logging policy
    logger = RandomAgent(Configuration({**random_args, **std_env_args}))

    std_extra_env_args = {
        'num_products': num_products,
        'number_of_flips': num_products // 2,
        'agent': logger,
    }

    ###################################################
    # Grid searches for POEM and Dual Bandit variants #
    ###################################################

    # Original POEM
    lambdas_poem_no_log = [.0, .05, .1, .25, .5, 1.0, 2.0]
    poem_no_log_mlr_args = {
        **pytorch_mlr_args,
Example #9
0
 def __init__(self, config=Configuration(nn_ips_args)):
     super(NnIpsAgent, self).__init__(config, NnIpsModelBuilder(config))
Example #10
0
 def __init__(self, config=Configuration(logreg_multiclass_ips_args)):
     super(LogregMulticlassIpsAgent,
           self).__init__(config, LogregMulticlassIpsModelBuilder(config))
Example #11
0
 def __init__(self, config = Configuration(skyline_args)):
     super(SkylineAgent, self).__init__(
         config,
         SkylineModelBuilder(config)
     )
     self.skyline = True
 def __init__(self, config = Configuration(organic_user_count_args)):
     super(OrganicUserEventCounterAgent, self).__init__(
         config,
         OrganicUserEventCounterModelBuilder(config)
     )
from agents import NnIpsAgent, nn_ips_args
from agents import OrganicCount, organic_count_args
from agents import OrganicUserEventCounterAgent, organic_user_count_args
from agents import LogregPolyAgent, logreg_poly_args

# Add a new environment here.
env_test = {
    "reco-gym-v1": env_1_args,
    "reco-gym-v0": env_0_args,
}

RandomSeed = 42

# Add a new agent here.
agent_test = {
    'prod2vec': BanditMFSquare(Configuration(bandit_mf_square_args)),
    'logistic': BanditCount(Configuration(bandit_count_args)),
    'randomagent': RandomAgent(Configuration({
        **random_args,
        'random_seed': RandomSeed,
    })),
    'logreg_multiclass_ips': LogregMulticlassIpsAgent(Configuration({
        **logreg_multiclass_ips_args,
        'select_randomly': False,
    })),
    'logreg_multiclass_ips R': LogregMulticlassIpsAgent(Configuration({
        **logreg_multiclass_ips_args,
        'select_randomly': True,
        'random_seed': RandomSeed,
    })),
    'nn_ips': NnIpsAgent(Configuration({
Example #14
0
def gather_exploration_stats(env,
                             env_args,
                             extra_env_args,
                             agents_init_data,
                             training_approach,
                             num_initial_train_users=1000,
                             num_step_users=1000,
                             epsilons=EvolutionEpsilons,
                             num_evolution_steps=6):
    """
    A helper function that collects data regarding Agents evolution
    under different values of epsilon for Epsilon-Greedy Selection Policy.

    :param env: The Environment where evolution should be applied;
         every time when a new step of the evolution is applied, the Environment is deeply copied
         thus the Environment does not interferes with evolution steps.

    :param env_args: Environment arguments (default ones).
    :param extra_env_args: extra Environment conditions those alter default values.
    :param agents_init_data: Agent initialisation data.
        This is a dictionary that has the following structure:
        {
            '<Agent Name>': {
                AgentInit.CTOR: <Constructor>,
                AgentInit.DEF_ARG: <Default Arguments>,
            }
        }


    :param training_approach:  A training approach applied in verification;
     for mode details look at `TrainingApproach' enum.

    :param num_initial_train_users: how many users' data should be used
     to train an initial model BEFORE evolution steps.

    :param num_step_users: how many users' data should be used
     at each evolution step.

     :param epsilons: a list of epsilon values.

    :param num_evolution_steps: how many evolution steps should be applied
     for an Agent with Epsilon-Greedy Selection Policy.

    :return a dictionary of Agent evolution statistics in the form:
        {
            'Agent Name': {
                'Epsilon Values': {
                    EvolutionCase.SUCCESS: [an array of clicks (for each ith step of evolution)]
                    EvolutionCase.FAILURE: [an array of failure to draw a click (for each ith step of evolution)]
                }
            }
        }
    """
    # A dictionary that stores all data of Agent evolution statistics.
    # Key is Agent Name, value is statistics.
    agent_evolution_stats = dict()

    new_env_args = {
        **env_args,
        **extra_env_args,
    }

    new_env = deepcopy(env)
    new_env.init_gym(new_env_args)

    agents = build_agents(agents_init_data, new_env_args)

    for agent_key in agents:
        print(f"Agent: {agent_key}")
        agent_stats = dict()

        with Pool(processes=multiprocessing.cpu_count()) as pool:
            for result in pool.map(_collect_evolution_stats, [{
                    'epsilon':
                    epsilon,
                    'env':
                    new_env,
                    'agent':
                    EpsilonGreedy(
                        Configuration({
                            **epsilon_greedy_args,
                            **new_env_args,
                            'epsilon': epsilon,
                        }), deepcopy(agents[agent_key])),
                    'num_initial_train_users':
                    num_initial_train_users,
                    'num_step_users':
                    num_step_users,
                    'num_evolution_steps':
                    num_evolution_steps,
                    'training_approach':
                    training_approach,
            } for epsilon in epsilons]):
                agent_stats = {
                    **agent_stats,
                    **result,
                }

        agent_evolution_stats[agent_key] = agent_stats

    return agent_evolution_stats