def _multiple_env(n_arms, n_step, n_test, prob_of_change, type_change):
    np.random.seed()

    # Build Agents
    ts = mab.BernoulliThompsonSampling(n_arms)
    discounted_ts = mab.DiscountedBernoulliTS(n_arms, gamma=0.98)
    sw_ts = mab.BernoulliSlidingWindowTS(n_arms, n=100)
    max_dsw_ts = mab.MaxDSWTS(n_arms, gamma=0.99, n=25)
    min_dsw_ts = mab.MinDSWTS(n_arms, gamma=0.95, n=100)
    mean_dsw_ts = mab.MeanDSWTS(n_arms, gamma=0.95, n=25)
    agents = [ts, discounted_ts, sw_ts, max_dsw_ts, min_dsw_ts, mean_dsw_ts]

    # Build Env with replay
    replay_env = mab.BernoulliReplayBandit(n_step=n_step,
                                           n_arms=n_arms,
                                           prob_of_change=prob_of_change,
                                           fixed_action_prob=0.0,
                                           type_change=type_change)

    # Build session
    replay_session = mab.Session(replay_env, agents)

    # Run session
    replay_session.run(n_step=n_step, n_test=n_test, use_replay=True)
    results = {
        agent: replay_session.get_reward_sum(agent) / n_step
        for agent in agents
    }
    results.update(
        {"Oracle": replay_session.get_reward_sum("Oracle") / n_step})
    return results
Beispiel #2
0
 def _find_params(self, f_gamma, f_n, sw_n, d_ts_gamma):
     n_arms = self._train_env._n_arms
     ########## BUILD AGENTS ###########
     agent_list = [
         mab.MaxDSWTS(n_arms=n_arms,
                      gamma=f_gamma,
                      n=f_n,
                      store_estimates=False),
         mab.MinDSWTS(n_arms=n_arms,
                      gamma=f_gamma,
                      n=f_n,
                      store_estimates=False),
         mab.MeanDSWTS(n_arms=n_arms,
                       gamma=f_gamma,
                       n=f_n,
                       store_estimates=False),
         mab.BernoulliSlidingWindowTS(n_arms=n_arms,
                                      n=sw_n,
                                      store_estimates=False),
         mab.DiscountedBernoulliTS(n_arms=n_arms,
                                   gamma=d_ts_gamma,
                                   store_estimates=False)
     ]
     np.random.seed()
     session = mab.Session(env=self._train_env, agent=agent_list)
     session.run(n_step=self._n_step_train, n_test=1, use_replay=True)
     return {
         str(agent): session.get_reward_sum(agent)
         for agent in agent_list
     }
Beispiel #3
0
    def _run(self, fake) -> Dict:
        ########## BUILD AGENTS ###########
        max_dsw_ts = mab.MaxDSWTS(n_arms=self._n_arms,
                                  gamma=0.99999,
                                  n=2000,
                                  store_estimates=False)
        min_dsw_ts = mab.MinDSWTS(n_arms=self._n_arms,
                                  gamma=0.99999,
                                  n=2000,
                                  store_estimates=False)
        mean_dsw_ts = mab.MeanDSWTS(n_arms=self._n_arms,
                                    gamma=0.99999,
                                    n=2000,
                                    store_estimates=False)
        ts = mab.BernoulliThompsonSampling(n_arms=self._n_arms,
                                           store_estimates=False)
        sw_ts = mab.BernoulliSlidingWindowTS(n_arms=self._n_arms,
                                             n=240000,
                                             store_estimates=False)
        d_ts = mab.DiscountedBernoulliTS(n_arms=self._n_arms,
                                         gamma=0.99999,
                                         store_estimates=False)
        agent_list = [
            max_dsw_ts, min_dsw_ts, mean_dsw_ts, ts, sw_ts, d_ts, "random"
        ]

        np.random.seed()
        c = self._compression
        reward_trace = {agent: [0] for agent in agent_list}
        reward_sum = {agent: 0 for agent in agent_list}

        for step in trange(self._n_step):
            for agent in agent_list:
                if agent == "random": action = random.randint(6)
                else: action = agent.select_action()

                cluster, click = self.select_cluster(step)

                if (cluster == action) and (click == 1): reward = 1
                else: reward = 0

                # Update statistics
                reward_sum[agent] += reward
                if step % c == 0:
                    reward_trace[agent].append(reward_trace[agent][-1] +
                                               reward / c)
                else:
                    reward_trace[agent][-1] += reward / c

                #Update agent estimates
                if agent != "random":
                    agent.update_estimates(action, reward)

        for agent in agent_list:
            reward_sum[agent] /= self._n_step
        return (reward_trace, reward_sum)
def test_bernoulli_algorithms():
    n_arms = 4
    env = mab.BernoulliBandit(n_arms)

    greedy_agent = mab.BernoulliGreedy(n_arms)
    ts_agent = mab.BernoulliThompsonSampling(n_arms)
    ucb_agent = mab.BernoulliUCB(n_arms, c=1)
    Discounted_ts_agent = mab.DiscountedBernoulliTS(n_arms, gamma=0.99)

    session = mab.Session(
        env, [greedy_agent, ts_agent, ucb_agent, Discounted_ts_agent])
    session.run(3000)
    '''
def custom_environments(n_arms, n_test, test_number: int) -> Tuple:
    # Build environment
    replay = {}
    if test_number == 1:
        # TEST 1
        replay = {
            'probabilities': [0.9, 0.7, 0.1, 0.3],
            250: [(0, 0.0)],
            500: [(1, 0.0)]
        }

    elif test_number == 2:
        # TEST 2
        replay = {
            'probabilities': [0.0, 0.0, 0.1, 0.3],
            250: [(0, 0.7)],
            500: [(1, 0.9)]
        }

    elif test_number == 3:
        # TEST 3
        replay = {'probabilities': [0.2, 0.3, 0.4, 0.5]}

    replay_env = mab.BernoulliReplayBandit(replay=replay)

    # Build Agents
    ts = mab.BernoulliThompsonSampling(n_arms)
    discounted_ts = mab.DiscountedBernoulliTS(n_arms, gamma=0.99)
    sw_ts = mab.BernoulliSlidingWindowTS(n_arms, n=100)
    max_dsw_ts = mab.MaxDSWTS(n_arms, gamma=0.99, n=50)
    min_dsw_ts = mab.MinDSWTS(n_arms, gamma=0.95, n=75)
    mean_dsw_ts = mab.MeanDSWTS(n_arms, gamma=0.99, n=50)
    agents = [ts, discounted_ts, sw_ts, max_dsw_ts, min_dsw_ts, mean_dsw_ts]

    # Build session
    replay_session = mab.Session(replay_env, agents)

    # Run session
    replay_session.run(n_step=1000, n_test=n_test, use_replay=True)

    return pd.DataFrame.from_dict(
        replay_session._regrets), pd.DataFrame.from_dict(
            replay_session._real_reward_trace)
def _find_params(n_arms, n_step, n_test, prob_of_change, type_change, f_gamma,
                 f_n, sw_n, d_ts_gamma):
    ########## BUILD AGENTS ###########
    max_dsw_ts = mab.MaxDSWTS(n_arms=n_arms,
                              gamma=f_gamma,
                              n=f_n,
                              store_estimates=False)
    min_dsw_ts = mab.MinDSWTS(n_arms=n_arms,
                              gamma=f_gamma,
                              n=f_n,
                              store_estimates=False)
    mean_dsw_ts = mab.MeanDSWTS(n_arms=n_arms,
                                gamma=f_gamma,
                                n=f_n,
                                store_estimates=False)
    sw_ts = mab.BernoulliSlidingWindowTS(n_arms=n_arms,
                                         n=sw_n,
                                         store_estimates=False)
    d_ts = mab.DiscountedBernoulliTS(n_arms=n_arms,
                                     gamma=d_ts_gamma,
                                     store_estimates=False)
    agents = [max_dsw_ts, min_dsw_ts, mean_dsw_ts, sw_ts, d_ts]

    np.random.seed()

    # Build Env with replay
    replay_env = mab.BernoulliReplayBandit(n_step=n_step,
                                           n_arms=n_arms,
                                           prob_of_change=prob_of_change,
                                           fixed_action_prob=0.0,
                                           type_change=type_change)

    # Build session
    replay_session = mab.Session(replay_env, agents)

    # Run session
    replay_session.run(n_step=n_step, n_test=n_test, use_replay=True)
    return {
        str(agent): replay_session.get_reward_sum(agent) /
        replay_session.get_reward_sum("Oracle")
        for agent in agents
    }
Beispiel #7
0
 def _best_agents(self, n_arms) -> List:
     return [
         mab.MaxDSWTS(n_arms=n_arms,
                      gamma=0.9999,
                      n=800,
                      store_estimates=False),
         mab.MinDSWTS(n_arms=n_arms,
                      gamma=0.99,
                      n=800,
                      store_estimates=False),
         mab.MeanDSWTS(n_arms=n_arms,
                       gamma=0.9999,
                       n=800,
                       store_estimates=False),
         mab.BernoulliThompsonSampling(n_arms=n_arms,
                                       store_estimates=False),
         mab.BernoulliSlidingWindowTS(n_arms=n_arms,
                                      n=12800,
                                      store_estimates=False),
         mab.DiscountedBernoulliTS(n_arms=n_arms,
                                   gamma=0.9999,
                                   store_estimates=False),
         mab.RandomAlgo(n_arms=n_arms)
     ]
Beispiel #8
0
    def _run_mod(self, fake) -> Dict:
        ########## BUILD AGENTS ###########
        max_dsw_ts = mab.MaxDSWTS(n_arms=self._n_arms,
                                  gamma=0.99999,
                                  n=2000,
                                  store_estimates=False)
        min_dsw_ts = mab.MinDSWTS(n_arms=self._n_arms,
                                  gamma=0.99999,
                                  n=2000,
                                  store_estimates=False)
        mean_dsw_ts = mab.MeanDSWTS(n_arms=self._n_arms,
                                    gamma=0.99999,
                                    n=2000,
                                    store_estimates=False)
        ts = mab.BernoulliThompsonSampling(n_arms=self._n_arms,
                                           store_estimates=False)
        sw_ts = mab.BernoulliSlidingWindowTS(n_arms=self._n_arms,
                                             n=240000,
                                             store_estimates=False)
        d_ts = mab.DiscountedBernoulliTS(n_arms=self._n_arms,
                                         gamma=0.99999,
                                         store_estimates=False)
        agent_list = [
            max_dsw_ts, min_dsw_ts, mean_dsw_ts, ts, sw_ts, d_ts, "random"
        ]

        np.random.seed()
        reward_trace = {agent: [0] for agent in agent_list}
        reward_sum = {agent: 0 for agent in agent_list}
        effective_steps = {agent: 0 for agent in agent_list}

        for step in trange(self._n_step):
            for agent in agent_list:
                # Check if the agent is already reach the "utils iterations"
                if effective_steps[agent] < self._termination_step:

                    if agent == "random": action = random.randint(6)
                    else: action = agent.select_action()

                    cluster, click = self.select_cluster(step)

                    reward = 0
                    if cluster == action:
                        effective_steps[agent] += 1
                        if click == 1: reward = 1
                        else: reward = 0

                        # Update statistics
                        reward_sum[agent] += reward
                        reward_trace[agent].append(reward_trace[agent][-1] +
                                                   reward)

                    #Update agent estimates
                    if agent != "random":
                        agent.update_estimates(action, reward)
                else:
                    print(agent, step)
                    agent_list.remove(agent)

        for key in reward_sum:
            reward_sum[key] /= self._termination_step

        return (reward_trace, reward_sum)