def init_botvbot(bot1idx, bot2idx, num_games, args, ai1_option, ai2_option, game_option, *, act_name="act", viz=False): # print('ai1 option:') # print(ai1_option.info()) # print('ai2 option:') # print(ai2_option.info()) # print('game option:') # print(game_option.info()) total_games = num_games batchsize = min(32, max(total_games // 2, 1)) act1_dc = tube.DataChannel(act_name + "1", batchsize, 1) act2_dc = tube.DataChannel(act_name + "2", batchsize, 1) context = tube.Context() idx2utype = [ minirts.UnitType.SPEARMAN, minirts.UnitType.SWORDMAN, minirts.UnitType.CAVALRY, minirts.UnitType.DRAGON, minirts.UnitType.ARCHER, ] if game_option.seed == 777: print("Using random seeds...") game_id = 0 rnd_num = random.randint(1, num_games - 1) for i in range(num_games): if game_option.seed == 777: seed = random.randint(1, 123456) else: seed = game_option.seed g_option = minirts.RTSGameOption(game_option) g_option.seed = seed + i g_option.game_id = str(i) g = minirts.RTSGame(g_option) bot1 = minirts.MediumAI(ai1_option, 0, None, idx2utype[bot1idx], 1) bot2 = minirts.MediumAI(ai2_option, 0, None, idx2utype[bot2idx], 1) # Utype + tower g.add_bot(bot1) g.add_bot(bot2) if viz and i == rnd_num: g.add_default_spectator() context.push_env_thread(g) game_id += 1 return context, act1_dc, act2_dc
def create_drift_games(num_sp, num_rb, args, ai1_option, ai2_option, game_option, *, act_name="act", viz=False): # print('ai1 option:') # print(ai1_option.info()) # print('ai2 option:') # print(ai2_option.info()) # print('game option:') # print(game_option.info()) if game_option.seed == 777: print("Using random seeds...") total_games = num_sp + num_rb batchsize = min(32, max(total_games // 2, 1)) act1_dc = tube.DataChannel(act_name + "1", batchsize, 1) act2_dc = tube.DataChannel(act_name + "2", batchsize, 1) context = tube.Context() idx2utype = [ minirts.UnitType.SPEARMAN, minirts.UnitType.SWORDMAN, minirts.UnitType.CAVALRY, minirts.UnitType.DRAGON, minirts.UnitType.ARCHER, ] game_id = 0 rnd_num = random.randint(0, num_rb - 1) for i in range(num_rb): if game_option.seed == 777: seed = random.randint(1, 123456) else: seed = game_option.seed bot1, g = create_game(act1_dc, ai1_option, game_option, game_id, seed) utype = idx2utype[random.randint(0, len(idx2utype) - 1)] bot2 = minirts.MediumAI(ai2_option, 0, None, utype, 1) # Utype + tower g.add_bot(bot1) g.add_bot(bot2) if viz and i == rnd_num: g.add_default_spectator() context.push_env_thread(g) game_id += 1 return context, act1_dc, act2_dc
def init_games(num_games, ai1_option, ai2_option, game_option, *, act_name="act"): # print('ai1 option:') # print(ai1_option.info()) # print('ai2 option:') # print(ai2_option.info()) # print('game option:') # print(game_option.info()) batchsize = min(32, max(num_games // 2, 1)) act1_dc = tube.DataChannel(act_name + "1", batchsize, 1) act2_dc = tube.DataChannel(act_name + "2", batchsize, 1) context = tube.Context() idx2utype = [ minirts.UnitType.SPEARMAN, minirts.UnitType.SWORDMAN, minirts.UnitType.CAVALRY, minirts.UnitType.DRAGON, minirts.UnitType.ARCHER, ] if game_option.seed == 777: print("Using random seeds...") for i in range(num_games): g_option = minirts.RTSGameOption(game_option) if game_option.seed == 777: print("Using random seeds...") seed = random.randint(1, 123456) else: seed = game_option.seed g_option.seed = seed + i g_option.game_id = str(i) if game_option.save_replay_prefix: g_option.save_replay_prefix = (game_option.save_replay_prefix + "_0_" + str(i)) g = minirts.RTSGame(g_option) bot1 = minirts.CheatExecutorAI(ai1_option, 0, None, act1_dc) bot2 = minirts.CheatExecutorAI(ai2_option, 0, None, act2_dc) # utype = idx2utype[i % len(idx2utype)] # bot2 = minirts.MediumAI(ai2_option, 0, None, utype, False) g.add_bot(bot1) g.add_bot(bot2) context.push_env_thread(g) return context, act1_dc, act2_dc
def create_game(num_games, ai1_option, ai2_option, game_option, *, act_name='act'): print('ai1 option:') print(ai1_option.info()) print('ai2 option:') print(ai2_option.info()) print('game option:') print(game_option.info()) batchsize = min(32, max(num_games // 2, 1)) act1_dc = tube.DataChannel(act_name + '1', batchsize, 1) act2_dc = tube.DataChannel(act_name + '2', batchsize, 1) context = tube.Context() idx2utype = [ minirts.UnitType.SPEARMAN, minirts.UnitType.SWORDMAN, minirts.UnitType.CAVALRY, minirts.UnitType.DRAGON, minirts.UnitType.ARCHER, ] for i in range(num_games): g_option = minirts.RTSGameOption(game_option) g_option.seed = game_option.seed + i if game_option.save_replay_prefix: g_option.save_replay_prefix = game_option.save_replay_prefix + str( i) g = minirts.RTSGame(g_option) bot1 = minirts.CheatExecutorAI(ai1_option, 0, None, act1_dc) bot2 = minirts.CheatExecutorAI(ai2_option, 0, None, act2_dc) # utype = idx2utype[i % len(idx2utype)] # bot2 = minirts.MediumAI(ai2_option, 0, None, utype, False) g.add_bot(bot1) g.add_bot(bot2) context.push_env_thread(g) return context, act1_dc, act2_dc
def create_tp_environment( seed_generator: Iterator[int], game_params: GameParams, simulation_params: SimulationParams, execution_params: ExecutionParams, pure_mcts: bool, ) -> Tuple[tube.Context, Optional[tube.DataChannel], Callable[[], int]]: human_first = execution_params.human_first time_ratio = execution_params.time_ratio total_time = execution_params.total_time context = tube.Context() actor_channel = (None if pure_mcts else tube.DataChannel( "act", simulation_params.num_actor, 1)) game = create_game( game_params, num_episode=1, seed=next(seed_generator), eval_mode=True, per_thread_batchsize=0, ) player = create_player( seed_generator=seed_generator, game=game, num_actor=simulation_params.num_actor, num_rollouts=simulation_params.num_rollouts, pure_mcts=pure_mcts, actor_channel=actor_channel, assembler=None, human_mode=True, total_time=total_time, time_ratio=time_ratio, ) tp_player = polygames.TPPlayer() if game.is_one_player_game(): game.add_tp_player(tp_player) else: if human_first: game.add_tp_player(tp_player) game.add_eval_player(player) else: game.add_eval_player(player) game.add_tp_player(tp_player) context.push_env_thread(game) def get_result_for_tp_player(): nonlocal game, human_first return game.get_result()[not human_first] return context, actor_channel, get_result_for_tp_player
def create_game(ai1_option, ai2_option, game_option, *, act_name='act'): print('ai1 option:') print(ai1_option.info()) print('ai2 option:') print(ai2_option.info()) print('game option:') print(game_option.info()) act_dc = tube.DataChannel(act_name, 1, -1) context = tube.Context() g = minirts.RTSGame(game_option) bot1 = minirts.CheatExecutorAI(ai1_option, 0, None, act_dc) bot2 = minirts.MediumAI(ai2_option, 0, None, minirts.UnitType.INVALID_UNITTYPE, False) g.add_bot(bot1) g.add_bot(bot2) g.add_default_spectator() context.push_env_thread(g) return context, act_dc
# Copyright (c) Facebook, Inc. and its affiliates.
def create_evaluation_environment( seed_generator: Iterator[int], game_params: GameParams, eval_params: EvalParams, current_batch_size: int = None, pure_mcts_eval: bool = False, pure_mcts_opponent: bool = True, num_evaluated_games: int = 0 ) -> Tuple[ tube.Context, Optional[tube.DataChannel], Optional[tube.DataChannel], Callable[[], List[int]], ]: num_game = eval_params.num_game_eval num_actor_eval = eval_params.num_actor_eval num_rollouts_eval = eval_params.num_rollouts_eval num_actor_opponent = eval_params.num_actor_opponent num_rollouts_opponent = eval_params.num_rollouts_opponent first_hand = [] second_hand = [] games = [] context = tube.Context() actor_channel_eval = ( None if pure_mcts_eval else tube.DataChannel("act_eval", num_game * num_actor_eval, 1) ) actor_channel_opponent = ( None if pure_mcts_opponent else tube.DataChannel("act_opponent", num_game * num_actor_opponent, 1) ) for game_no in range(current_batch_size if current_batch_size else num_game): game = create_game( game_params, num_episode=1, seed=next(seed_generator), eval_mode=True ) player = create_player( seed_generator=seed_generator, game=game, player="mcts", num_actor=num_actor_eval, num_rollouts=num_rollouts_eval, pure_mcts=pure_mcts_eval, actor_channel=actor_channel_eval, model_manager=None, human_mode=False, sample_before_step_idx=8, randomized_rollouts=False, sampling_mcts=False, ) if game.is_one_player_game(): game.add_eval_player(player) first_hand.append(game) else: opponent = create_player( seed_generator=seed_generator, game=game, player="mcts", num_actor=num_actor_opponent, num_rollouts=num_rollouts_opponent, pure_mcts=pure_mcts_opponent, actor_channel=actor_channel_opponent, model_manager=None, human_mode=False, sample_before_step_idx=8, randomized_rollouts=False, sampling_mcts=False, ) game_id = num_evaluated_games + game_no if player_moves_first(game_id, num_game): game.add_eval_player(player) game.add_eval_player(opponent) first_hand.append(game) else: game.add_eval_player(opponent) game.add_eval_player(player) second_hand.append(game) context.push_env_thread(game) games.append(game) def get_eval_reward(): nonlocal first_hand, second_hand reward = [] for hand in first_hand: reward.append(hand.get_result()[0]) for hand in second_hand: reward.append(hand.get_result()[1]) return reward return context, actor_channel_eval, actor_channel_opponent, get_eval_reward
def create_human_environment( seed_generator: Iterator[int], game_params: GameParams, simulation_params: SimulationParams, execution_params: ExecutionParams, pure_mcts: bool, model ) -> Tuple[tube.Context, Optional[tube.DataChannel], Callable[[], int]]: human_first = execution_params.human_first time_ratio = execution_params.time_ratio total_time = execution_params.total_time context = tube.Context() actor_channel = (None if pure_mcts else tube.DataChannel( "act", simulation_params.num_actor, 1)) rnn_state_shape = [] if model is not None and hasattr(model, "rnn_cells") and model.rnn_cells > 0: rnn_state_shape = [model.rnn_cells, model.rnn_channels] rnn_state_size = 0 if len(rnn_state_shape) >= 2: rnn_state_size = rnn_state_shape[0] * rnn_state_shape[1] logit_value = getattr(model, "logit_value", False) game = create_game( game_params, num_episode=1, seed=next(seed_generator), eval_mode=True, per_thread_batchsize=0, rewind=simulation_params.rewind, predict_end_state=game_params.predict_end_state, predict_n_states=game_params.predict_n_states, ) player = create_player( seed_generator=seed_generator, game=game, player="mcts", num_actor=simulation_params.num_actor, num_rollouts=simulation_params.num_rollouts, pure_mcts=pure_mcts, actor_channel=actor_channel, model_manager=None, human_mode=True, total_time=total_time, time_ratio=time_ratio, sample_before_step_idx=80, randomized_rollouts=False, sampling_mcts=False, rnn_state_shape=rnn_state_shape, rnn_seqlen=execution_params.rnn_seqlen, logit_value=logit_value, ) human_player = polygames.HumanPlayer() if game.is_one_player_game(): game.add_human_player(human_player) else: if human_first: game.add_human_player(human_player) game.add_eval_player(player) else: game.add_eval_player(player) game.add_human_player(human_player) context.push_env_thread(game) def get_result_for_human_player(): nonlocal game, human_first return game.get_result()[not human_first] return context, actor_channel, get_result_for_human_player