def __init__(self, score_target=80, out_of=100, debug=False, step_r=0.5, lose_r=0., draw_r=0.5, win_r=1.): # Set the env # score target = won of last out_of self.env = ke.make("connectx", debug=debug) self.configuration = self.env.configuration # (lose, draw, win) tuple self.standard_rewards = self.env.specification.reward.enum self.step_r = step_r self.lose_r = lose_r self.draw_r = draw_r self.win_r = win_r self.score_target = score_target self.out_of = out_of # Training config self.pair = [None, "random"] self.agent_position = 0 self.trainer = self.env.train(self.pair) # Define required gym fields (examples): self.action_space = gym.spaces.Discrete(self.configuration.columns) self.observation_space = gym.spaces.Discrete( self.configuration.columns * self.configuration.rows)
def test_convert_inputs(self): env = make("halite", debug=True) agent_count = 4 env.reset(agent_count) obs = env.state[0].observation converted_tensor = convert_inputs(obs) #test_tensor = torch.tensor(4,21,21) ship1_index = obs['players'][0][2]['0-1'][0] ship2_index = obs['players'][1][2]['0-2'][0] ship3_index = obs['players'][2][2]['0-3'][0] ship4_index = obs['players'][3][2]['0-4'][0] col1, row1 = get_col_row(21, ship1_index) col2, row2 = get_col_row(21, ship2_index) col3, row3 = get_col_row(21, ship3_index) col4, row4 = get_col_row(21, ship4_index) self.assertEqual(converted_tensor[0, 2, row1, col1].item(), 1) self.assertEqual(converted_tensor[0, 2, row2, col2].item(), 2) self.assertEqual(converted_tensor[0, 2, row3, col3].item(), 3) self.assertEqual(converted_tensor[0, 2, row4, col4].item(), 4) #no shipyards should exist, and no ships should be holding any halite shipyard_equal_to_zero = torch.eq(converted_tensor[0, 1, :, :], torch.zeros(1, 1, 21, 21)) ship_halite_equal_to_zero = torch.eq(converted_tensor[0, 3, :, :], torch.zeros(1, 1, 21, 21)) self.assertEqual(torch.all(shipyard_equal_to_zero), True) self.assertEqual(torch.all(ship_halite_equal_to_zero), True)
def run_main(agent_file1, agent_file2, num_episodes): """Load one or two agents from filenames Load one or two agents from filenames and run against each other. Agent file 2 can be a kaggle provided agent. """ agent1 = load_agent(agent_file1) if os.path.exists(agent_file2): agent2 = load_agent(agent_file2) else: # assume it's a kaggle-compatible string agent2 = agent_file2 print("Make env") env = ke.make("connectx", debug=True) env.reset() print("Run a single game against random (test, not recorded)") run_agent_game(env, agent1, "random", render=True) print("\nNote: If there was an error in running above,\n script" " probably won't fail but agent is likely kaput.\n") print("Do the full evaluation vs agent2") env.reset() mean_scores = compare_agents(env, agent1, agent2, num_episodes=num_episodes)
def replay_match(path, step=0): """Replay game to a specific step - necessary for recreating stateful values.""" with open(path,encoding='ascii') as f: match = json.load(f) env = make("halite", configuration=match['configuration'], steps=match['steps']) myid = [pid for pid, name in enumerate(match['info']['TeamNames']) if name == "Ready Salted"][0] config = env.configuration # env already done - can write out full replay t = env.render(mode='html', return_obj=True, width=800, height=800, header=False, controls=True) write_html(t, 'replay.html') # If agent carries state across turns, need to run through all steps, or can directly index into a step otherwise # check that we are correct player # print('My Id: ', board.current_player_id, board.current_player) print(f'Running for:\n\t{path}\n\t{agent.__module__}\n\tID = {myid}\n') actTime = 0 for step in range(400): state = match['steps'][step][0] # list of length 1 for each step obs = state['observation'] # these are observations at this step obs['player'] = myid # change the player to the one we want to inspect obs = structify(obs) # turn the dict's into structures with attributes icon = '\|/-'[(obs.step+1) % 4] t0 = time.time() ret = agent(obs, config) actTime = time.time() - t0 print(f'{icon} step+1: {obs.step +1} StepTime:{round(actTime,2)}')#, end="\r", flush=True)
def run_episode(AI, board_size: int = 21, max_turns: int = 400, starting_halite: int = 5000, agent_count: int = 4): """ Runs a complete episode with a given AI and game settings. :param AI: The AI function to call :param board_size: The size of the board to use :param max_turns: The turn at which the scores will be totaled & the winner decided :param starting_halite: The amount of Halite that each agent starts with :param agent_count: How many agents should be simulated :return: The final scores of each player """ environment = make("halite", configuration={ "size": board_size, "startingHalite": starting_halite }) environment.reset(num_agents=agent_count) state = environment.state[0] board = helpers.Board(raw_observation=state.observation, raw_configuration=environment.configuration) environment.run(["random", "random"]) out = environment.render(mode="html", width=500, height=450) f = open("halite.html", "w") f.write(out) f.close()
def test_threshold_tie(): env = make("rps", configuration={"episodeSteps": 3, "tieRewardThreshold": 4}) env.run([rock, paper]) assert env.render(mode='ansi') == "Round 1: Rock vs Paper, Score: -1.0 to 1.0\nRound 2: Rock vs Paper, Score: 0 to 0\nGame ended on round 2, final score: 0 to 0\n" json = env.toJSON() assert json["rewards"] == [0, 0] assert json["statuses"] == ["DONE", "DONE"]
def selfplay(agent, against, num): training_data = [] for game in range(num): turn = np.random.randint(2) env = make("connectx") if turn == 0: trainer = env.train([None, against]) else: trainer = env.train([against, None]) done = False states = [] observation = trainer.reset() root = None while not done: policy = mcts(observation, env.state, agent, against) # print(observation) action = int(np.random.choice(range(7), p=policy)) states.append([processObservation(observation), policy]) # print(turn, processObservation(observation).reshape(6, 7), policy, sep='\n') observation, reward, done, info = trainer.step(action) if reward == None: reward = -1 print("|", end='') training_data.append({'states': states, 'result': reward}) return training_data
def main(runs: int): # submission = utils.read_file("rf_sub.py") env = make( "football", configuration={ "save_video": True, "scenario_name": "11_vs_11_hard_stochastic", # "scenario_name": "11_vs_11_easy_stochastic", # "scenario_name": "11_vs_11_stochastic", "running_in_notebook": False, "dump_full_episodes": True, "logdir": "../cache/runs/", "render": True }, debug=True) rewards = [] for _ in range(runs): # output = env.run(["main.py", "do_nothing"])[-1] # output = env.run(["main.py", "run_right"])[-1] output = env.run(["main.py", "builtin_ai"])[-1] # output = env.run(["main.py", "main.py"])[-1] print('Left player: reward = %s, status = %s, info = %s' % (output[0]['reward'], output[0]['status'], output[0]['info'])) rewards.append(output[0]['reward']) # print('Right player: reward = %s, status = %s, info = %s' % # (output[1]['reward'], output[1]['status'], output[1]['info'])) # env.render(mode="human", width=800, height=600) print(rewards) print(np.mean(rewards))
def _reset(self): self.last_reward = 0 self.turns_counter = 0 self.previous_ship_count = 0 self.episode_ended = False self.total_reward = 0 self.turns_not_moved = 0 self.action_history = [] # initialize game self.environment = make("halite", configuration={ "size": self._board_size, "startingHalite": 1000, "episodeSteps": self._max_turns }) self.environment.reset(self._agent_count) # get board self.board = self.get_board() self.state = np.zeros( [self._channels, self._board_size, self._board_size]) self.state_history = [self.state] * self._frames self.prime_board() return_object = ts.restart(np.array(self.state_history, dtype=np.float)) return return_object
def __init__(self, board_size=5, startingHalite=1000): self.agent_count = 1 self.board_size = board_size self.max_nb_ships = 1 self.environment = make("halite", configuration={ "size": board_size, "startingHalite": startingHalite }) self.environment.reset(self.agent_count) state = self.environment.state[0] self.board = Board(state.observation, self.environment.configuration) #self.max_steps = 400 #self.current_step = 0 # Ship actions in order: # [Hold, North, East, South, West, Convert] self.ship_action_conversion_dict = { 0: None, 1: ShipAction.NORTH, 2: ShipAction.EAST, 3: ShipAction.SOUTH, 4: ShipAction.WEST, 5: ShipAction.CONVERT }
def simulate_one_game(agent_names): agent_0 = get_agent_class(agent_names[0])() agent_1 = get_agent_class(agent_names[1])() def action_0(obs): return agent_0.action_wrapper(obs) def action_1(obs): return agent_1.action_wrapper(obs) env = make( environment="football", configuration={ "save_video": False, "scenario_name": "11_vs_11_kaggle", # "episodeSteps": 10, }, ) env.reset() env.run([action_0, action_1]) score = env.state[0]["observation"]["players_raw"][0]["score"] if agent_names[0] in GREENLIST_TO_SAVE: save_score_and_log(agent_0, score) if agent_names[1] in GREENLIST_TO_SAVE: save_score_and_log(agent_1, score[::-1]) return score
def __init__(self, hidden_dim, buffer_size, gamma, batch_size, device, writer): self.env = make("connectx", debug=True) self.device = device self.policy = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim, self.env.configuration.columns).to( device) self.target = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim, self.env.configuration.columns).to( device) self.enemyNet = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim, self.env.configuration.columns).to( device) self.target.load_state_dict(self.policy.state_dict()) self.target.eval() self.buffer = ExperienceReplay(buffer_size) self.enemy = "random" self.trainingPair = self.env.train([None, self.enemy]) self.loss_function = nn.MSELoss() self.optimizer = optim.Adam(params=self.policy.parameters(), lr=0.001) self.gamma = gamma self.batch_size = batch_size self.first = True self.player = 1 self.writer = writer
def create_board(size=3, starting_halite=0, agent_count=2): env = make("halite", configuration={ "size": size, "startingHalite": starting_halite }) return Board(env.reset(agent_count)[0].observation, env.configuration)
def play(left_player, right_player, print_details=True, save_video=False, debug=True) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]: env = make("football", debug=debug, configuration={ "save_video": save_video, "scenario_name": "11_vs_11_kaggle" }) output = env.run([left_player, right_player]) if print_details: for s, (left, right) in enumerate(output): print(f"\nStep {s}") print(f"Left player ({left_player}): \n" f"actions taken: {left['action']}, " f"reward: {left['reward']}, " f"status: {left['status']}, " f"info: {left['info']}") print(f"Right player ({right_player}): \n" f"actions taken: {right['action']}, " f"reward: {right['reward']}, " f"status: {right['status']}, " f"info: {right['info']}\n") print( f"Final score: {output[-1][0]['reward']} : {output[-1][1]['reward']}" ) return output
def __init__(self): halite_env = make('halite', configuration=GAME_CONFIG, debug=True) self.env = halite_env.train(GAME_AGENTS) self.config = halite_env.configuration self.action_space = spaces.MultiDiscrete([N_SHIP_ACTIONS] * MAX_SHIPS + [N_YARD_ACTIONS] * MAX_YARDS) self.action_space = gym_wrapper.spec_from_gym_space(space=self.action_space, name='action') self.observation_space = spaces.Box(low=0, high=1, shape=(self.config.size, self.config.size, N_FEATURES), dtype=np.float32) self.observation_space = gym_wrapper.spec_from_gym_space(space=self.observation_space, name='observation') self.observation_space = array_spec.BoundedArraySpec( shape=(self.config.size, self.config.size, N_FEATURES), dtype=np.int32, minimum=0, maximum=1, name='observation') self.reward_range = (REWARD_LOST, REWARD_WON) self.obs = None self.last_obs = None self.spec = None self.metadata = None
def _run_inner(lh_source: str, rh_source: str) -> SimulateResult: env = make("mab", debug=True) start_time = datetime.datetime.now() result = env.run([lh_source, rh_source]) duration = datetime.datetime.now() - start_time return SimulateResult(duration, result, env)
def __init__(self, switch_prob=0.5, use_random_training=True, random_agent=False, test_mode=False): self.env = make('connectx', debug=True) if use_random_training: if random.uniform(0, 1) < 0.6: self.pair = [None, 'negamax'] print('create negamax agent') else: self.pair = [None, 'random'] print('create random agent') else: self.pair = [None, 'negamax'] #test setup if random_agent and test_mode: self.pair = [None, 'random'] elif test_mode: self.pair = [None, 'negamax'] self.trainer = self.env.train(self.pair) self.switch_prob = switch_prob # Define required gym fields (examples): config = self.env.configuration self.action_space = gym.spaces.Discrete(config.columns) self.observation_space = gym.spaces.Discrete(config.columns * config.rows)
def __init__(self, switch_prob=0.5, opponent='random', invalid_action=-100): self.env = make('connectx') if opponent not in self.env.agents: raise InvalidArgument(f"Agent must be in {self.env.agents}") self.pair = [None, opponent] self.trainer = self.env.train(self.pair) self.switch_prob = switch_prob config = self.env.configuration self.action_space = spaces.Discrete(config.columns) self.observation_space = spaces.Box(low=0, high=2, dtype=np.uint8, shape=(config.columns * config.rows, )) self.reward_range = [-1, 1] if invalid_action < self.reward_range[0]: self.reward_range[0] = invalid_action elif invalid_action > self.reward_range[1]: self.reward_range[1] = invalid_action self.invalid_action = invalid_action
def run_game(weights): critic = Critic([64, 64, 64, 64, 32, 32, 32, 32, 16, 16], NUM_ACT, STOCK_X) critic(critic.stock) critic.set_weights(pickle.loads(weights)) geese = [Goose(critic) for _ in range(NUM_GEESE)] env = make('hungry_geese') steps = env.run(geese) for i, step in enumerate(steps): if i <= 0: continue obs = step[0]['observation'] for ii, goose in enumerate(obs['geese']): if goose: geese[ii].cs[i - 1].r += 1 + len(goose) elif steps[i - 1][0]['observation']['geese'][ii]: geese[ii].cs[i - 1].r -= 10 dat = list() for goose in geese: for c in goose.cs[:-1]: c.ss = True if goose.cs[-1].r >= 0.: goose.cs[-1].r /= (1. - NUM_LAMBDA) * 2. dat.extend(goose.cs) return dat
def playGame(pnet, nnet, args, player): """ Executes one episode of a game. """ env = make("hungry_geese", configuration={ "rows": args.boardSize[0], "columns": args.boardSize[1] }, debug=False) env.reset(args.numAgents) prev_actions = [None] * args.numAgents while env.state[player]['status'] == 'ACTIVE' and not env.done: board = get_board(env.state[0].observation, prev_actions, args) # predict players' action actions = [] pis, _ = pnet.predicts(board, player % args.n_gpus) for i, pi in enumerate(pis): # this player uses nnet if i == player: pi, _ = nnet.predict(board, player, player % args.n_gpus) action = select_action(pi, prev_actions[i]) actions.append(action) env.step(actions) prev_actions = actions reward = get_reward(env.state[0].observation, player, args.numAgents) length = env.state[0].observation.step return reward, length
def train_agent(episodes=20, steps_per_episode=50): env = make("halite", debug=True) # env.run(["random"]) # env.render(mode="ipython", width=800, height=600) print('configuration') print(env.configuration) ship_state_wrapper = ShipStateWrapper(radius=4, max_frames=1, map_size=int( env.configuration['size'])) shipyard_state_wrapper = ShipYardStateWrapper( radius=4, max_frames=1, map_size=int(env.configuration['size'])) print(env.configuration) print("Initialized state wrappers") ship_agent = Agent(alpha=0.99, gamma=0.75, n_actions=6, batch_size=64, epsilon=.9, input_dims=ship_state_wrapper.state_size) shipyard_agent = Agent(alpha=0.99, gamma=0.75, n_actions=2, batch_size=64, epsilon=.9, input_dims=shipyard_state_wrapper.state_size) print("Initialized agents") trainer = env.train([None, "random", "random", "random"]) observation = trainer.reset() print("Initialized trainer") halite_env = HaliteEnv(opponents=3, ship_state_wrapper=ship_state_wrapper, shipyard_state_wrapper=shipyard_state_wrapper, radius=4, trainer=trainer) all_rewards = [] for i in range(episodes): episode_rewards = play_episode(ship_agent=ship_agent, shipyard_agent=shipyard_agent, env=halite_env, configuration=env.configuration, episode_number=i, training=True, n_steps=steps_per_episode) all_rewards.append(episode_rewards) return all_rewards
def before_each(state=None, configuration=None): global env steps = [] if state == None else [state] env = make("football", steps=steps, configuration=configuration, debug=True)
def mcts(observation, state, agent, against, root=None): probs, value = agent(observation) root = Node(value, probs) current_n = root.number for i in range(MCTS_WAVES): current_n += 1 env = make("connectx", debug=False) if (observation['mark'] == 1): trainer = env.train([None, against]) else: trainer = env.train([against, None]) env.state = state.copy() wave(root, agent, trainer, current_n) policy = [] for child in root.links: if child is None: policy.append(0) else: policy.append(child.number) policy = np.array(policy) policy = policy**(1 / TEMPERATURE) policy = policy / policy.sum() return policy
def test_run_timeout(): env = make("tictactoe", debug=True, configuration={ "agentTimeout": 10, "actTimeout": 10, "runTimeout": 6 }) state = env.run([custom1, custom3])[-1] assert state == [ { "action": 0, "reward": 0, "info": {}, "observation": { "board": [1, 2, 1, 2, 1, 2, 0, 0, 0], "mark": 1 }, "status": "ACTIVE", }, { "action": 5, "reward": 0, "info": {}, "observation": { "mark": 2 }, "status": "INACTIVE", }, ]
def test_agents_can_timeout_on_act(): env = make("tictactoe", debug=True, configuration={ "agentTimeout": 5, "actTimeout": 1 }) state = env.run([custom1, custom3])[-1] assert state == [ { "action": 0, "reward": 0, "info": {}, "observation": { "board": [1, 2, 1, 0, 0, 0, 0, 0, 0], "mark": 1 }, "status": "DONE", }, { "action": None, "reward": None, "info": {}, "observation": { "mark": 2 }, "status": "TIMEOUT", }, ]
def test_win(): env = make("rps", configuration={"episodeSteps": 2}) env.run([paper, rock]) json = env.toJSON() print(json) assert json["rewards"] == [1, -1] assert json["statuses"] == ["DONE", "DONE"]
def test_halite_exception_action_has_error_status(): env = make("halite", debug=True) def error_agent(obs, config): raise Exception("An exception occurred!") env.run([error_agent, random_agent]) json = env.toJSON() assert json["name"] == "halite" assert json["statuses"] == ["ERROR", "DONE"]
def __init__(self): self.env = make("connectx", debug=True) self.trainer = self.env.train([None, "random"]) # Define required gym fields (examples): config = self.env.configuration self.action_space = gym.spaces.Discrete(config.columns) self.observation_space = gym.spaces.Discrete(config.columns * config.rows)
def get_sample_board(board_size, agent_count): environment = make( "halite", configuration={"size": board_size, "startingHalite": 1000} ) environment.reset(agent_count) state = environment.state[0] board = Board(state.observation, environment.configuration) return board
def eval_model(model, board_size=20): environment = make("halite", configuration={ "size": board_size, "startingHalite": 1000 }) environment.run([agent(model), extra_agent, extra_agent, extra_agent]) return eval_env(environment)