Example #1
0
 def test_header(self):
     """Define the arena with one version"""
     arena = Arena([("Random A", lambda seed: AgentRandom(seed)),
                    ("Random C", lambda seed: AgentRandom(seed)),
                    ("Random B", lambda seed: AgentRandom(seed))], 5)
     self.assertListEqual(arena.csv_header(),
                          ["opponent", "Random A", "Random B", "Random C"])
Example #2
0
 def test_init_multiple(self):
     """Define the arena with several agents"""
     arena = Arena([("Random A", lambda seed: AgentRandom(seed)),
                    ("Random C", lambda seed: AgentRandom(seed)),
                    ("Random B", lambda seed: AgentRandom(seed))], 5)
     self.assertListEqual(arena.names(),
                          ["Random A", "Random B", "Random C"])
Example #3
0
    def test_random_move_02(self):
        """Test multiple moves"""
        game = Game()
        agent = AgentRandom(454)
        move = agent.move(game)
        self.assertEqual(move, 5)
        game.move(move)

        move = agent.move(game)
        self.assertEqual(move, 12)
Example #4
0
 def test_list(self):
     """Define the arena with one version"""
     arena = Arena([("Random A", lambda seed: AgentRandom(seed)),
                    ("Random C", lambda seed: AgentRandom(seed)),
                    ("Random B", lambda seed: AgentRandom(seed))], 5)
     self.assertEqual(len(arena.csv_results_lists()), 3)
     self.assertListEqual(
         arena.csv_results_lists(),
         [['Random A', 0.4, 0.4, 0.4], ['Random C', 0.4, 0.4, 0.4],
          ['Random B', 0.4, 0.4, 0.4]])
Example #5
0
 def test_init_multiple(self):
     """Define the arena with several agents"""
     arena = Arena([("Random A", lambda seed: AgentRandom(seed)),
                    ("Random C", lambda seed: AgentRandom(seed)),
                    ("Random B", lambda seed: AgentRandom(seed))], 5)
     results = arena.results()
     self.assertEqual(len(results), 3)
     self.assertListEqual(
         results, [['Random A', 0.4, 0.4, 0.4], ['Random C', 0.4, 0.4, 0.4],
                   ['Random B', 0.4, 0.4, 0.4]])
Example #6
0
PARSER = argparse.ArgumentParser(
    description='Run the arena with availabe agents')

PARSER.add_argument('--output', type=str, default='arena.results.csv',
                    help='Path to write arena results')

ARGS = PARSER.parse_args()

print('Starting arena')

agents = [
    # Place agents in this list as created
    # first in the tuple is the readable name
    # second is a lambda that ONLY takes a random seed. This can be discarded
    # if the the Agent does not require a seed
    ("Random", lambda seed: AgentRandom(seed)),
    ('Max', lambda seed: AgentMax(seed)),
    ('Exact', lambda seed: AgentExact(seed)),
    ('MinMax', lambda seed: AgentMinMax(seed, depth=3))
]
if AGENT_A3C is not None:
    agents.append(AGENT_A3C)

ARENA = Arena(agents, 500)


print('Run the arena for: ', ARENA.csv_header())

with open(ARGS.output, 'w') as f:
    WRITER = csv.writer(f)
    WRITER.writerow(ARENA.csv_header())
Example #7
0
    import torch
    from mancala.agents.a3c import AgentA3C
    dtype = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor
    AGENT_A3C = AgentA3C(os.path.join("models", "a3c.model"), dtype, 454)
except ImportError:
    AGENT_A3C = None
except:
    print("Unable to load A3C")
    AGENT_A3C = None

FLASKAPP = Flask(__name__)
FLASKAPP.config.from_object(__name__)

# Define agents
AGENT_RANDOM = AgentRandom(454)
AGENT_MAX = AgentMax(454)
AGENT_MINNY = AgentMinMax(454, 3)
AGENT_EXACT = AgentExact(454)


def board_str_to_game(board, player_turn):
    """Turns parameters into game or error tuple"""
    board_arr = split_string(board, 2)

    if len(board_arr) != 14:
        return jsonify({"error": "Invalid Board"}), 400

    if player_turn != 1 and player_turn != 2:
        return jsonify({"error": "Invalid Player"}), 400
Example #8
0
 def test_random_no_moves_player2(self):
     """Test move is seeded properly"""
     move = AgentRandom(454).move(
         Game([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2))
     self.assertEqual(move, 0)
Example #9
0
 def test_random_move_seed(self):
     """Test move is seeded properly"""
     move = AgentRandom(454).move(Game())
     self.assertEqual(move, 5)
Example #10
0
 def test_random_move(self):
     """Test move picks a random spot on the right side"""
     move = AgentRandom().move(Game())
     self.assertEqual(move, 2)
Example #11
0
 def test_init_single(self):
     """Define the arena with one version"""
     arena = Arena([("Random", lambda seed: AgentRandom(seed))], 5)
     results = arena.results()
     self.assertEqual(len(results), 1)
     self.assertListEqual(results, [["Random", 0.4]])
Example #12
0
 def test_init_single(self):
     """Define the arena with one version"""
     arena = Arena([("Random", lambda seed: AgentRandom(seed))], 5)
     self.assertListEqual(arena.names(), ["Random"])
Example #13
0
def test(rank, args, shared_model, dtype):
    test_ctr = 0
    torch.manual_seed(args.seed + rank)

    # set up logger
    timestring = str(date.today()) + '_' + \
        time.strftime("%Hh-%Mm-%Ss", time.localtime(time.time()))
    run_name = args.save_name + '_' + timestring
    configure("logs/run_" + run_name, flush_secs=5)

    env = MancalaEnv(args.seed + rank)
    env.seed(args.seed + rank)
    np_random, _ = seeding.np_random(args.seed + rank)
    state = env.reset()

    model = ActorCritic(state.shape[0], env.action_space).type(dtype)

    model.eval()

    state = torch.from_numpy(state).type(dtype)
    reward_sum = 0
    max_reward = -99999999
    max_winrate = 0
    rewards_recent = deque([], 100)
    done = True

    start_time = time.time()
    last_test = time.time()

    episode_length = 0
    while True:
        episode_length += 1
        # Sync with the shared model
        if done:
            model.load_state_dict(shared_model.state_dict())
            cx = Variable(torch.zeros(1, 400).type(dtype), volatile=True)
            hx = Variable(torch.zeros(1, 400).type(dtype), volatile=True)
        else:
            cx = Variable(cx.data.type(dtype), volatile=True)
            hx = Variable(hx.data.type(dtype), volatile=True)

        value, logit, (hx, cx) = model((Variable(state.unsqueeze(0),
                                                 volatile=True), (hx, cx)))
        prob = F.softmax(logit)
        action = prob.max(1)[1].data.cpu().numpy()

        scores = [(action, score)
                  for action, score in enumerate(prob[0].data.tolist())]

        valid_actions = [action for action, _ in scores]
        valid_scores = np.array([score for _, score in scores])

        final_move = np_random.choice(valid_actions,
                                      1,
                                      p=valid_scores / valid_scores.sum())[0]

        state, reward, done, _ = env.step(final_move)
        done = done or episode_length >= args.max_episode_length
        reward_sum += reward

        if done:
            rewards_recent.append(reward_sum)
            rewards_recent_avg = sum(rewards_recent) / len(rewards_recent)
            print(
                "{} | {} | Episode Reward {: >4}, Length {: >2} | Avg Reward {:0.2f}"
                .format(
                    datetime.datetime.now().isoformat(),
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    round(reward_sum, 2), episode_length,
                    round(rewards_recent_avg, 2)))

            # if not stuck or args.evaluate:
            log_value('Reward', reward_sum, test_ctr)
            log_value('Reward Average', rewards_recent_avg, test_ctr)
            log_value('Episode length', episode_length, test_ctr)

            if reward_sum >= max_reward or \
                time.time() - last_test > 60 * 8 or \
                (len(rewards_recent) > 12 and
                    time.time() - last_test > 60 * 2 and
                    sum(list(rewards_recent)[-5:]) > sum(list(rewards_recent)[-10:-5])):

                # if the reward is better or every 15 minutes
                last_test = time.time()
                max_reward = reward_sum if reward_sum > max_reward else max_reward

                path_output = args.save_name + '_max'
                torch.save(shared_model.state_dict(), path_output)
                path_now = "{}_{}".format(args.save_name,
                                          datetime.datetime.now().isoformat())
                torch.save(shared_model.state_dict(), path_now)

                win_rate_v_random = Arena.compare_agents_float(
                    lambda seed: AgentA3C(path_output, dtype, seed),
                    lambda seed: AgentRandom(seed), performance_games)
                win_rate_v_exact = Arena.compare_agents_float(
                    lambda seed: AgentA3C(path_output, dtype, seed),
                    lambda seed: AgentExact(seed), performance_games)
                win_rate_v_minmax = Arena.compare_agents_float(
                    lambda seed: AgentA3C(path_output, dtype, seed),
                    lambda seed: AgentMinMax(seed, 3), performance_games)
                win_rate_exact_v = 1 - Arena.compare_agents_float(
                    lambda seed: AgentExact(seed),
                    lambda seed: AgentA3C(path_output, dtype, seed),
                    performance_games)
                win_rate_minmax_v = 1 - Arena.compare_agents_float(
                    lambda seed: AgentMinMax(seed, 3),
                    lambda seed: AgentA3C(path_output, dtype, seed),
                    performance_games)
                msg = " {} | Random: {: >5}% | Exact: {: >5}%/{: >5}% | MinMax: {: >5}%/{: >5}%".format(
                    datetime.datetime.now().strftime("%c"),
                    round(win_rate_v_random * 100, 2),
                    round(win_rate_v_exact * 100, 2),
                    round(win_rate_exact_v * 100, 2),
                    round(win_rate_v_minmax * 100, 2),
                    round(win_rate_minmax_v * 100, 2))
                print(msg)
                log_value('WinRate_Random', win_rate_v_random, test_ctr)
                log_value('WinRate_Exact', win_rate_v_exact, test_ctr)
                log_value('WinRate_MinMax', win_rate_v_minmax, test_ctr)
                log_value('WinRate_ExactP2', win_rate_exact_v, test_ctr)
                log_value('WinRate_MinMaxP2', win_rate_minmax_v, test_ctr)
                avg_win_rate = (win_rate_v_exact + win_rate_v_minmax +
                                win_rate_exact_v + win_rate_minmax_v) / 4
                if avg_win_rate > max_winrate:
                    print("Found superior model at {}".format(
                        datetime.datetime.now().isoformat()))
                    torch.save(
                        shared_model.state_dict(), "{}_{}_best_{}".format(
                            args.save_name,
                            datetime.datetime.now().isoformat(), avg_win_rate))
                    max_winrate = avg_win_rate

            reward_sum = 0
            episode_length = 0
            state = env.reset()
            test_ctr += 1

            if test_ctr % 10 == 0 and not args.evaluate:
                # pickle.dump(shared_model.state_dict(), open(args.save_name + '.p', 'wb'))
                torch.save(shared_model.state_dict(), args.save_name)
            if not args.evaluate:
                time.sleep(60)
            elif test_ctr == evaluation_episodes:
                # Ensure the environment is closed so we can complete the
                # submission
                env.close()
                # gym.upload('monitor/' + run_name, api_key=api_key)

        state = torch.from_numpy(state).type(dtype)