def train(self, n: int, output_iters: int, save_model=False, model_dir=model_dir, model_name='mcts', save_data=False, data_dir=data_dir, data_name='data'): avg = 0 for i in tqdm(range(n)): # initialize new game self.reset(i) self.run() self.data.update(self.game, self.player, i) avg = sum(self.data.scores) / (i + 1) if i > 0 and i % output_iters == 0: print( f'Last {output_iters} avg: {sum(self.data.scores[i-output_iters:i]) / output_iters}' ) print(f'Total {i} avg: {avg}') if save_model: save(os.path.join(model_dir, model_name), self.player.root) save(os.path.join(model_dir, f'{model_name}_rollout'), self.rollout) if save_data: self.data.update_dataframes() self.data.augment_avg_scores(100) save(os.path.join(data_dir, data_name), self.data)
def main(args: ArgumentParser): if args.debug: logging.basicConfig(level=logging.INFO) config = GameConfig(prosperity=args.prosperity, num_players=len(args.players), sandbox=args.sandbox, feature_type=args.ftype, device=args.device) if args.tree_path: tree = GameTree.load(args.tree_path, False) else: tree = None players = load_players(args.players, args.models, tree=tree, train=False, rollout_type=args.rollout_type) logger = logging.getLogger() if args.log_buys: logger.setLevel(BUY) env = DefaultEnvironment(config, players, logger=logger) sim_data = simulate(env, args.n, tree) if args.save_data: save(args.data_path, sim_data)
def train_mcts(env: Environment, tree: GameTree, path: str, rollout_path: str, epochs: int, train_epochs_interval: int = 1000, train_epochs_cap=10000, save_epochs=1000, scoring='win_loss'): for epoch in tqdm(range(epochs)): state: State = env.reset() tree.reset(state) done = False expanded = False flip = False data = { 'features': [], 'rewards': [], 'cards': [], 'idxs': state.feature.idxs } data['model_name'] = os.path.split(path)[-1] while not done: action = DecisionResponse([]) d: DecisionState = state.decision player: Player = env.players[d.controlling_player] # Add any states now visible due to randomness if tree.in_tree: cards = d.card_choices + [None] tree.node.add_unique_children(cards) player.makeDecision(state, action) if isinstance(player, MCTSPlayer): x = state.feature.to_numpy() data['features'].append(x) data['cards'].append(action.single_card) # Advance to the next node within the tree, implicitly adding a node the first time we exit tree if tree.in_tree: tree.advance(action.single_card) # First time we go out of tree, enter rollout phase if not expanded and not tree.in_tree: # Previous node is starting player action, so current node is opponent player action. flip = (state.player == 1) expanded = True obs, reward, done, _ = env.step(action) data['rewards'].extend([reward] * (len(data['features']) - len(data['rewards']))) start_idx = 1 if flip else 0 p0_score, p1_score = state.get_player_score(0), state.get_player_score( 1) if scoring == 'score': p0_reward, p1_reward = p0_score, p1_score elif scoring == 'win_loss': if reward == 0: p0_reward, p1_reward = 1 / 2, 1 / 2 elif reward == 1: p0_reward, p1_reward = 1, 0 else: p0_reward, p1_reward = 0, 1 elif scoring == 'score_ratio': min_score = min(p0_score, p1_score) if min_score < 0: p0_score_nonneg, p1_score_nonneg = p0_score + abs( min_score), p1_score + abs(min_score) else: p0_score_nonneg, p1_score_nonneg = p0_score, p1_score if p0_score_nonneg == 0 and p1_score_nonneg == 0: p0_reward, p1_reward = 0, 0 else: total_score = p0_score_nonneg + p1_score_nonneg p0_reward, p1_reward = p0_score / total_score, p1_score / total_score tree.node.backpropagate((p0_reward, p1_reward), start_idx=start_idx) if save_epochs > 0 and epoch % save_epochs == 0: save(path, tree._root) for player in env.players: if isinstance(player, MCTSPlayer): player.rollout.save(rollout_path) break # mcts players share the tree, so only update once for player in env.players: if isinstance(player, MCTSPlayer): player.rollout.update(**data) if (epoch + 1) % train_epochs_interval == 0 and ( epoch + 1) < train_epochs_cap: player.rollout.learn() for player in env.players: if isinstance(player, MCTSPlayer): player.rollout.save(rollout_path) break save(path, tree._root)
def save(self, path: str): state_dict = {} state_dict['models'] = self.models save(path, state_dict)
def save(self, path: str): state_dict = {'mast': self.mast, 'tau': self.tau} save(path, state_dict)