def test_execute_action_on_copies_doesnt_conflict(): players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ] game = Game(players) p0_color = game.state.colors[0] game.execute(Action(p0_color, ActionType.BUILD_SETTLEMENT, 0)) action = Action(p0_color, ActionType.BUILD_ROAD, (0, 1)) game_copy = game.copy() game_copy.execute(action) game_copy = game.copy() game_copy.execute(action) game.execute(action)
def decide(self, game: Game, playable_actions): """ For each move, will run N playouts, get statistics, and save into replay buffer. Every M decisions, will: - flush replay buffer to disk (for offline experiments) - report progress on games thus far to TensorBoard (tf.summary module) - update model by choosing L random samples from replay buffer and train model. do we need stability check? i think not. and override model path. Decision V1 looks like, predict and choose the one that creates biggest 'distance' against enemies. Actually this is the same as maximizing wins. Decision V2 looks the same as V1, but minimaxed some turns in the future. """ if len(playable_actions ) == 1: # this avoids imbalance (if policy-learning) return playable_actions[0] start = time.time() # Run MCTS playouts for each possible action, save results for training. samples = [] scores = [] print(playable_actions) for action in playable_actions: print("Considering", action) action_applied_game_copy = game.copy() action_applied_game_copy.execute(action) sample = create_sample_vector(action_applied_game_copy, self.color) samples.append(sample) if TRAIN: # Save snapshots from the perspective of each player (more training!) counter = run_playouts(action_applied_game_copy, NUM_PLAYOUTS) mcts_labels = {k: v / NUM_PLAYOUTS for k, v in counter.items()} DATA_LOGGER.consume(action_applied_game_copy, mcts_labels) scores.append(mcts_labels.get(self.color, 0)) # TODO: if M step, do all 4 things. if TRAIN and self.step % FLUSH_EVERY == 0: self.update_model_and_flush_samples() # scores = get_model().call(tf.convert_to_tensor(samples)) best_idx = np.argmax(scores) best_action = playable_actions[best_idx] if TRAIN: print("Decision took:", time.time() - start) self.step += 1 return best_action
def test_copy(): """Play 30 moves, copy game, ensure they look the same but not the same.""" players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ] game = Game(players) for i in range(30): game.play_tick() game_copy = game.copy() assert json.dumps(game, cls=GameEncoder) == json.dumps(game_copy, cls=GameEncoder) assert game_copy != game
def decide(self, game: Game, playable_actions: Iterable[Action]): """Should return one of the playable_actions. Args: game (Game): complete game state. read-only. playable_actions (Iterable[Action]): options to choose from Return: action (Action): Chosen element of playable_actions """ decided_action = super().decide(game, playable_actions) # Log simple dataset of simple features and MCTS Score results = run_playouts(game.copy(), NUM_SIMULATIONS) vector = simple_feature_vector(game, self.color) vector["LABEL"] = results[self.color] / float(NUM_SIMULATIONS) RECORDS.append(vector) return decided_action
def decide(self, game: Game, playable_actions): # if len(game.state.actions) > 10: # import sys # sys.exit(1) actions = list_prunned_actions(game) if self.prunning else playable_actions if len(actions) == 1: return actions[0] start = time.time() root = StateNode(self.color, game.copy(), None, self.prunning) for _ in range(self.num_simulations): root.run_simulation() print( f"{str(self)} took {time.time() - start} secs to decide {len(playable_actions)}" ) return root.choose_best_action()
def decide(self, game: Game, playable_actions): if len(playable_actions) == 1: return playable_actions[0] best_value = float("-inf") best_actions = [] for action in playable_actions: game_copy = game.copy() game_copy.execute(action) key = player_key(game_copy.state, self.color) value = game_copy.state.player_state[f"{key}_ACTUAL_VICTORY_POINTS"] if value == best_value: best_actions.append(action) if value > best_value: best_value = value best_actions = [action] return random.choice(best_actions)
def decide(self, game: Game, playable_actions): actions = self.get_actions(game) if len(actions) == 1: return actions[0] if self.epsilon is not None and random.random() < self.epsilon: return random.choice(playable_actions) start = time.time() state_id = str(len(game.state.actions)) node = DebugStateNode(state_id, self.color) # i think it comes from outside deadline = start + MAX_SEARCH_TIME_SECS result = self.alphabeta(game.copy(), self.depth, float("-inf"), float("inf"), deadline, node) # print("Decision Results:", self.depth, len(actions), time.time() - start) # if game.state.num_turns > 10: # render_debug_tree(node) # breakpoint() return result[0]
def decide(self, game: Game, playable_actions): if len(playable_actions) == 1: return playable_actions[0] start = time.time() # num_playouts = PLAYOUTS_BUDGET // len(playable_actions) num_playouts = self.num_playouts best_action = None max_wins = None for action in playable_actions: action_applied_game_copy = game.copy() action_applied_game_copy.execute(action) counter = run_playouts(action_applied_game_copy, num_playouts) wins = counter[self.color] if max_wins is None or wins > max_wins: best_action = action max_wins = wins print(f"Greedy took {time.time() - start} secs to decide " + f"{len(playable_actions)} at {num_playouts} per action") return best_action
RandomPlayer(Color.BLUE), RandomPlayer(Color.WHITE), RandomPlayer(Color.ORANGE), ]) game.play() print(sys.getsizeof(game)) print(getsize(game)) print(game) start = time.time() copy.deepcopy(game) end = time.time() print("copy.deepcopy(game) took", end - start, "seconds") start = time.time() game.copy() end = time.time() print("game.copy() took", end - start, "seconds") start = time.time() state = np.random.random((1500, )) end = time.time() print("Create Numpy Vector", end - start, "seconds") print(sys.getsizeof(state), getsize(state)) start = time.time() np.copy(state) end = time.time() print("np.copy(a) took", end - start, "seconds") a = np.random.randint(0, 2, size=(500, 500))