コード例 #1
0
def main():
    env = StarCraft2Env(map_name="8m")
    env_info = env.get_env_info()

    n_actions = env_info["n_actions"]
    n_agents = env_info["n_agents"]

    n_episodes = 10

    for e in range(n_episodes):
        env.reset()
        terminated = False
        episode_reward = 0

        while not terminated:
            obs = env.get_obs()
            state = env.get_state()

            actions = []
            for agent_id in range(n_agents):
                avail_actions = env.get_avail_agent_actions(agent_id)
                avail_actions_ind = np.nonzero(avail_actions)[0]
                action = np.random.choice(avail_actions_ind)
                actions.append(action)

            reward, terminated, _ = env.step(actions)
            episode_reward += reward

        print("Total reward in episode {} = {}".format(e, episode_reward))

    env.close()
コード例 #2
0
ファイル: smac_env.py プロジェクト: dylanradovic/rl_games-1
    def __init__(self, name="3m", **kwargs):
        gym.Env.__init__(self)
        self.seed = kwargs.pop('seed', None)
        self.reward_sparse = kwargs.get('reward_sparse', False)
        self.use_central_value = kwargs.pop('central_value', False)
        self.random_invalid_step = kwargs.pop('random_invalid_step', False)
        self.replay_save_freq = kwargs.pop('replay_save_freq', 10000)
        self.apply_agent_ids = kwargs.pop('apply_agent_ids', False)
        self.env = StarCraft2Env(map_name=name, seed=self.seed, **kwargs)
        self.env_info = self.env.get_env_info()

        self._game_num = 0
        self.n_actions = self.env_info["n_actions"]
        self.n_agents = self.env_info["n_agents"]
        self.action_space = gym.spaces.Discrete(self.n_actions)
        one_hot_agents = 0

        if self.apply_agent_ids:
            one_hot_agents = self.n_agents
        self.observation_space = gym.spaces.Box(
            low=0,
            high=1,
            shape=(self.env_info['obs_shape'] + one_hot_agents, ),
            dtype=np.float32)
        self.state_space = gym.spaces.Box(
            low=0,
            high=1,
            shape=(self.env_info['state_shape'], ),
            dtype=np.float32)

        self.obs_dict = {}
コード例 #3
0
    def __init__(self, env, args):
        self.env = env

        # 用来在一个稀疏奖赏的环境上评估算法的好坏,胜利为1,失败为-1,其他普通的一步为0
        self.env_evaluate = StarCraft2Env(map_name=args.map,
                                          step_mul=args.step_mul,
                                          difficulty=args.difficulty,
                                          game_version=args.game_version,
                                          seed=args.seed,
                                          replay_dir=args.replay_dir,
                                          reward_sparse=True,
                                          reward_scale=False)

        if args.alg.find('commnet') > -1 or args.alg.find('g2anet') > -1:  # communication
            self.agents = CommAgents(args)
            self.rolloutWorker = CommRolloutWorker(env, self.agents, args)
            self.evaluateWorker = CommRolloutWorker(self.env_evaluate, self.agents, args)
        else:  # no communication
            self.agents = Agents(args)
            self.rolloutWorker = RolloutWorker(env, self.agents, args)
            self.evaluateWorker = RolloutWorker(self.env_evaluate, self.agents, args)
        if args.alg.find('coma') == -1 and args.alg.find('central_v') == -1 and args.alg.find('reinforce') == -1:  # these 3 algorithm are on-poliy
            self.buffer = ReplayBuffer(args)
        self.args = args

        # 用来保存plt和pkl
        self.save_path = self.args.result_dir + '/' + args.alg + '/' + args.map
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
コード例 #4
0
def heuristic_run(n_episodes, map_name, env_args):
    env_args['map_name'] = map_name
    pprint.pprint(env_args)
    env = StarCraft2Env(**env_args)
    wins = 0
    with trange(n_episodes) as t:
        for i in t:
            env.reset()
            terminated = False
            episode_reward = 0

            while not terminated:
                actions = []
                for agent_id in range(env.n_agents):
                    avail_actions = env.get_avail_agent_actions(agent_id)
                    avail_actions_ind = np.nonzero(avail_actions)[0]
                    action = np.random.choice(avail_actions_ind)
                    # _, haction_num = env.get_agent_action_heuristic(agent_id, action)
                    actions.append(action)

                reward, terminated, info = env.step(actions)
            try:
                wins += info['battle_won']
            except:
                continue
            t.set_postfix(win_rate=wins / (i + 1.))
        env.close()
    print("\n")
    print("In {} episodes games, heuristic ai wins {}; win rate is {}".format(
        n_episodes, wins,
        float(wins) / float(n_episodes)))
コード例 #5
0
    def __init__(self, **smac_args):
        """Create a new multi-agent StarCraft env compatible with RLlib.

        Arguments:
            smac_args (dict): Arguments to pass to the underlying
                smac.env.starcraft.StarCraft2Env instance.

        Examples:
            >>> from smac_rllib import RLlibStarCraft2Env
            >>> env = RLlibStarCraft2Env(map_name="8m")
            >>> print(env.reset())
        """

        self._env = StarCraft2Env(**smac_args)
        self.horizon = self._env.episode_limit
        self.nbr_agents = self._env.n_agents
        self._ready_agents = []
        self.observation_space = Dict({
            "obs": Box(-1, 1, shape=(self.nbr_agents, self._env.get_obs_size(),)),
            "avail_actions": Box(0, 1, shape=(self.nbr_agents, self._env.get_total_actions(),)),
            "state": Box(-float('inf'), float('inf'), shape=(self._env.get_state_size(),)),
            "battle_won": Box(0,1, shape=(1,), dtype=np.bool),
            "dead_allies": Box(0,self.nbr_agents, shape=(1,), dtype=np.int),
            "dead_enemies": Box(0, int(1e3), shape=(1,), dtype=np.int)
        })
        self.action_space = MultiDiscrete([self._env.get_total_actions()] * self.nbr_agents)
コード例 #6
0
    def __init__(self, env, args):
        self.env = env

        # 用来在一个稀疏奖赏的环境上评估算法的好坏,胜利为1,失败为-1,其他普通的一步为0
        self.env_evaluate = StarCraft2Env(map_name=args.map,
                                          step_mul=args.step_mul,
                                          difficulty=args.difficulty,
                                          game_version=args.game_version,
                                          seed=args.seed,
                                          replay_dir=args.replay_dir,
                                          reward_sparse=True,
                                          reward_scale=False)

        if args.alg == 'commnet_coma':
            self.agents = CommNetAgents(args)
            self.rolloutWorker = CommNetRolloutWorker(env, self.agents, args)
            self.evaluateWorker = CommNetRolloutWorker(self.env_evaluate,
                                                       self.agents, args)
        else:
            self.agents = Agents(args)
            self.rolloutWorker = RolloutWorker(env, self.agents, args)
            self.evaluateWorker = RolloutWorker(self.env_evaluate, self.agents,
                                                args)
        if args.alg != 'coma' and args.alg != 'commnet_coma':
            self.buffer = ReplayBuffer(args)
        self.args = args

        # 用来保存plt和pkl
        self.save_path = self.args.result_dir + '/' + args.alg + '/' + args.map
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
コード例 #7
0
ファイル: env.py プロジェクト: do-not-be-hasty/seed_rl
def create_environment(_):
    """Returns a starcraft environment."""
    task = FLAGS.task_name

    logging.info('Creating environment: %s', task)
    env = StarCraft2Env(map_name=task, replay_dir=FLAGS.replay_dir)
    return env_wrappers.SCWrapper(env)
コード例 #8
0
def prepare_env_and_agents():
    # base path to save results in a dedicated directory every launch
    launch_time = datetime.datetime.now()
    save_path_base = pathlib.Path(RESULT_PATH_BASE)
    save_path_base = save_path_base / f'{MODEL_NAME}' \
                                      f'_d{launch_time:%Y_%m_%d}' \
                                      f'_t{launch_time:%H_%M_%S}'

    tb_path = save_path_base / 'tensorboard'
    tb_writer = tensorboardX.SummaryWriter(str(tb_path.resolve()))

    # calculate eps decay and num exploration from N_EPISODE
    num_exploration_ep = int(N_EPISODE * EXPLORATION_FRAQ)
    save_freq = min(20, N_EPISODE // 15)
    eps_decay_eps = N_EPISODE * EPS_TIME_FRACTION

    # prepare env
    env = StarCraft2Env(map_name=MAP_NAME,
                        seed=SEED,
                        reward_only_positive=False,
                        obs_timestep_number=True,
                        reward_scale_rate=200)
    # prepare agents
    agents: List[Agent] = prepare_agents(
        env,
        eps_decay_eps,
    )  # tb_writer)
    return save_freq, agents, env, num_exploration_ep, save_path_base, tb_writer
コード例 #9
0
ファイル: genetic_learn.py プロジェクト: DrMatters/sc2_agents
def main():
    tb_writer = tensorboardX.SummaryWriter(RESULT_PATH_BASE + 'tensorboard')
    env = StarCraft2Env(map_name=MAP_NAME, seed=SEED,
                        reward_only_positive=False, obs_timestep_number=True,
                        reward_scale_rate=200)

    if PRESET == 'q_table':
        # env = StarCraft2Env(map_name="2m2zFOX", difficulty="1", seed=SEED)
        evaluator = evaluate.SCAbsPosEvaluator(env)
        toolbox = prepare_env(individuals.AgentwiseQTable, evaluator)
    elif PRESET == 'dqn':
        evaluator = evaluate.SCNativeEvaluator(env, tb_writer)
        toolbox = prepare_env(individuals.AgentwiseFullyConnected, evaluator)
    else:
        raise NotImplementedError(f'Preset {PRESET} for genetic learn is not available')

    pop = toolbox.population(n=POPULATION)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    algorithms.eaSimple(pop, toolbox, cxpb=0.7, mutpb=0.2, ngen=NUM_GENERATIONS,
                        stats=stats, halloffame=hof)
    save_top_individual(hof)
    print(f'Num of evaluations (episodes): {evaluator.evaluation_counter}')

    if EVALUATE_TOP:
        print("results of evaluation of top individual")
        evaluator.evaluate_single(hof.items[0], 0)

    return pop, stats, hof
コード例 #10
0
ファイル: smac_env.py プロジェクト: schroederdewitt/rl_games
    def __init__(self, name="3m", replay_save_freq=4000, **kwargs):
        gym.Env.__init__(self)
        self.seed = kwargs.pop('seed', None)
        self.reward_sparse = kwargs.pop('reward_sparse', False)
        self.env = StarCraft2Env(map_name=name,
                                 seed=self.seed,
                                 reward_sparse=self.reward_sparse)
        self.env_info = self.env.get_env_info()
        self.replay_save_freq = replay_save_freq
        self._game_num = 0
        self.n_actions = self.env_info["n_actions"]
        self.n_agents = self.env_info["n_agents"]
        self.action_space = gym.spaces.Discrete(self.n_actions)
        self.observation_space = gym.spaces.Box(
            low=0,
            high=1,
            shape=(self.env_info['obs_shape'], ),
            dtype=np.float32)
        self.central_state_space = gym.spaces.Box(
            low=0,
            high=1,
            shape=(self.env_info['state_shape'], ),
            dtype=np.float32)

        self.random_invalid_step = kwargs.pop('random_invalid_step', False)
コード例 #11
0
    def __init__(self):
        self.rnnagent = RNNAgent()
        self.epsilon = config.epsilon_start
        self.epsilon_deg = (config.epsilon_start -
                            config.epsilon_finish) / config.epsilon_anneal_time

        self.env = StarCraft2Env(map_name=config.map_name)
コード例 #12
0
    def __init__(
        self,
        map_name="8m",
        step_mul=None,
        move_amount=2,
        difficulty="7",
        game_version=None,
        seed=None,
        continuing_episode=False,
        obs_all_health=True,
        obs_own_health=True,
        obs_last_action=False,
        obs_pathing_grid=False,
        obs_terrain_height=False,
        obs_instead_of_state=False,
        state_last_action=True,
        reward_sparse=False,
        reward_only_positive=True,
        reward_death_value=10,
        reward_win=200,
        reward_defeat=0,
        reward_negative_scale=0.5,
        reward_scale=True,
        reward_scale_rate=20,
        replay_dir="",
        replay_prefix="",
        window_size_x=1920,
        window_size_y=1200,
        debug=False,
    ):
        self.env = StarCraft2Env(map_name=map_name, step_mul=step_mul, move_amount=move_amount, difficulty=difficulty, \
                                    game_version=game_version, seed=seed, continuing_episode=continuing_episode, \
                                    obs_all_health=obs_all_health, obs_own_health=obs_own_health, obs_last_action=obs_last_action, \
                                    obs_pathing_grid=obs_pathing_grid, obs_terrain_height=obs_terrain_height, \
                                    obs_instead_of_state=obs_instead_of_state, state_last_action=state_last_action, \
                                    reward_sparse=reward_sparse, reward_only_positive=reward_only_positive, \
                                    reward_death_value=reward_death_value, reward_win=reward_win, reward_defeat=reward_defeat, \
                                    reward_negative_scale=reward_negative_scale, reward_scale=reward_scale, reward_scale_rate=reward_scale_rate, \
                                    replay_dir=replay_dir, replay_prefix=replay_prefix, window_size_x=window_size_x, window_size_y=window_size_y, \
                                    debug=debug)
        env_info = self.env.get_env_info()

        num_actions = env_info['n_actions']
        self.n = env_info['n_agents']
        self.state_shape = env_info['state_shape']

        # Configure action space
        self.action_space = []
        self.observation_space = []

        for _ in range(self.n):
            self.action_space.append(spaces.Discrete(num_actions))
            self.observation_space.append(
                spaces.Box(low=-1.0,
                           high=1.0,
                           shape=(self.env.get_obs_size(), ),
                           dtype=np.float32))

        self.state = None
コード例 #13
0
def main(model_path):
    env = StarCraft2Env(map_name="8m",
                        window_size_x=1920 / 3,
                        window_size_y=1080 / 3)

    loaded_model = load_model(model_path)
    mean_wr = test_model(loaded_model, env, num_runs=50)
    return mean_wr
コード例 #14
0
ファイル: env_starcraft.py プロジェクト: shishouyuan/xingtian
 def init_env(self, env_info):
     logging.debug("init env with: {}".format(env_info))
     print(env_info)
     sys.stdout.flush()
     _info = env_info.copy()
     if "agent_num" in _info.keys():
         _info.pop("agent_num")
     return StarCraft2Env(**_info)
コード例 #15
0
def load_smac_env(env_config: Dict[str, Any]) -> StarCraft2Env:
    """Loads a smac environment given a config dict. Also, the possible agents in the
    environment are set"""

    env = StarCraft2Env(**env_config)
    env.possible_agents = list(range(env.n_agents))

    return env
コード例 #16
0
    def run(self):
        self.log = logging.getLogger('StarCraftII')
        message = "start rl algorithm"
        self.log.info(message)
        Signal.get_signal().emit_signal_str(message)
        env = StarCraft2Env(
            map_name="3m",
            window_size_x=1418,
            window_size_y=890,
            window_loc=(5, 155),
        )
        env_info = env.get_env_info()

        n_actions = env_info["n_actions"]
        n_agents = env_info["n_agents"]
        message = "n_actions : {}".format(n_actions)
        self.log.info(message)
        Signal.get_signal().emit_signal_str(message)
        message = "n_agents : {}".format(n_agents)
        self.log.info(message)
        Signal.get_signal().emit_signal_str(message)

        n_episodes = 100

        for e in range(n_episodes):
            env.reset()
            terminated = False
            episode_reward = 0

            if self.stop:
                break

            while not terminated:
                obs = env.get_obs()
                state = env.get_state()

                if globalInformation.get_value(strings.IS_STOP):
                    self.stop = True
                    break

                actions = []
                for agent_id in range(n_agents):
                    avail_actions = env.get_avail_agent_actions(agent_id)
                    avail_actions_ind = np.nonzero(avail_actions)[0]
                    action = np.random.choice(avail_actions_ind)
                    actions.append(action)

                reward, terminated, _ = env.step(actions)
                episode_reward += reward

            message = "Total reward in episode {} = {}".format(
                e, episode_reward)
            self.log.info(message)
            Signal.get_signal().emit_signal_str(message)

        env.close()
        Signal.get_signal().emit_signal_gameover()
コード例 #17
0
def prepare_agents(env: StarCraft2Env, eps_decay_steps, tb_writer=None):
    env_info = env.get_env_info()
    n_agents = env_info['n_agents']
    agents: List[Agent] = []
    n_actions = env_info['n_actions']
    n_features = env.get_obs_size()
    for i in range(n_agents):
        agents.append(
            Agent(i,
                  n_features,
                  n_actions,
                  eps_decay_steps,
                  LR,
                  TARGET_UPDATE,
                  MEMORY_SIZE,
                  batch_size=BATCH_SIZE,
                  tb_writer=tb_writer,
                  discount=DISCOUNT))
    return agents
コード例 #18
0
def main():
    config = deepcopy(QMixConfig)
    env = StarCraft2Env(map_name=config['scenario'],
                        difficulty=config['difficulty'])
    env = SC2EnvWrapper(env)
    config['episode_limit'] = env.episode_limit
    config['obs_shape'] = env.obs_shape
    config['state_shape'] = env.state_shape
    config['n_agents'] = env.n_agents
    config['n_actions'] = env.n_actions

    rpm = EpisodeReplayBuffer(config['replay_buffer_size'])
    agent_model = RNNModel(config)
    qmixer_model = QMixerModel(config)
    algorithm = QMIX(agent_model, qmixer_model, config)
    qmix_agent = QMixAgent(algorithm, config)

    while rpm.count < config['memory_warmup_size']:
        train_reward, train_step, train_is_win, train_loss, train_td_error\
                = run_train_episode(env, qmix_agent, rpm, config)

    total_steps = 0
    last_test_step = -1e10
    while total_steps < config['training_steps']:
        train_reward, train_step, train_is_win, train_loss, train_td_error\
                = run_train_episode(env, qmix_agent, rpm, config)
        total_steps += train_step

        if total_steps - last_test_step >= config['test_steps']:
            last_test_step = total_steps
            eval_is_win_buffer = []
            eval_reward_buffer = []
            eval_steps_buffer = []
            for _ in range(3):
                eval_reward, eval_step, eval_is_win = run_evaluate_episode(
                    env, qmix_agent)
                eval_reward_buffer.append(eval_reward)
                eval_steps_buffer.append(eval_step)
                eval_is_win_buffer.append(eval_is_win)

            summary.add_scalar('train_loss', train_loss, total_steps)
            summary.add_scalar('eval_reward', np.mean(eval_reward_buffer),
                               total_steps)
            summary.add_scalar('eval_steps', np.mean(eval_steps_buffer),
                               total_steps)
            summary.add_scalar('eval_win_rate', np.mean(eval_is_win_buffer),
                               total_steps)
            summary.add_scalar('exploration', qmix_agent.exploration,
                               total_steps)
            summary.add_scalar('replay_buffer_size', rpm.count, total_steps)
            summary.add_scalar('target_update_count',
                               qmix_agent.target_update_count, total_steps)
            summary.add_scalar('train_td_error:', train_td_error, total_steps)
コード例 #19
0
def find_best_model(model_path, model_num):
    args = get_common_args()
    if args.alg == 'coma':
        args = get_coma_args(args)
        rnn_suffix = 'rnn_params.pkl'
        critic_fuffix = 'critic_params.pkl'
        policy = COMA
    elif args.alg == 'qmix':
        args = get_mixer_args(args)
        rnn_suffix = 'rnn_net_params.pkl'
        critic_fuffix = 'qmix_net_params.pkl'
        policy = QMIX
    elif args.alg == 'vdn':
        args = get_mixer_args(args)
        rnn_suffix = 'rnn_net_params.pkl'
        critic_fuffix = 'vdn_net_params.pkl'
        policy = VDN
    else:
        raise Exception("Not finished")
    env = StarCraft2Env(map_name=args.map,
                        step_mul=args.step_mul,
                        difficulty=args.difficulty,
                        game_version=args.game_version,
                        replay_dir=args.replay_dir)
    env_info = env.get_env_info()
    args.n_actions = env_info["n_actions"]
    args.n_agents = env_info["n_agents"]
    args.state_shape = env_info["state_shape"]
    args.obs_shape = env_info["obs_shape"]
    args.episode_limit = env_info["episode_limit"]
    args.evaluate_epoch = 100
    runner = Runner(env, args)
    max_win_rate = 0
    max_win_rate_idx = 0
    for num in range(model_num):
        critic_path = model_path + '/' + str(num) + '_' + critic_fuffix
        rnn_path = model_path + '/' + str(num) + '_' + rnn_suffix
        if os.path.exists(critic_path) and os.path.exists(rnn_path):
            os.rename(critic_path, model_path + '/' + critic_fuffix)
            os.rename(rnn_path, model_path + '/' + rnn_suffix)
            runner.agents.policy = policy(args)
            win_rate = runner.evaluate_sparse()
            if win_rate > max_win_rate:
                max_win_rate = win_rate
                max_win_rate_idx = num

            os.rename(model_path + '/' + critic_fuffix, critic_path)
            os.rename(model_path + '/' + rnn_suffix, rnn_path)
            print('The win rate of {} is  {}'.format(num, win_rate))
    print('The max win rate is {}, model index is {}'.format(
        max_win_rate, max_win_rate_idx))
コード例 #20
0
def main():
    args = parser.parse_args()
    env = StarCraft2Env(map_name="15z3m_drm",
                        seed=SEED,
                        reward_only_positive=False,
                        obs_timestep_number=True,
                        reward_scale_rate=200,
                        realtime=False)
    evaluator = evaluate.SCNativeEvaluator(env)

    top_individual = read_last_individual(args, evaluator)

    with torch.no_grad():
        evaluator.evaluate_single(top_individual, 50)
    return 0
コード例 #21
0
    def __init__(self, **smac_args):
        """Create a new multi-agent StarCraft env compatible with RLlib.
        Arguments:
            smac_args (dict): Arguments to pass to the underlying
                smac.env.starcraft.StarCraft2Env instance.
        Examples:
            >>> from smac.examples.rllib import RLlibStarCraft2Env
            >>> env = RLlibStarCraft2Env(map_name="8m")
            >>> print(env.reset())
        """

        self._env = StarCraft2Env(**smac_args)
        self._ready_agents = []
        self.observation_space = Dict({
            "obs": Box(-1, 1, shape=(self._env.get_obs_size(),)),
            "action_mask": Box(0, 1, shape=(self._env.get_total_actions(),)),
        })
        self.action_space = Discrete(self._env.get_total_actions())
コード例 #22
0
ファイル: genetic_eval.py プロジェクト: DrMatters/sc2_agents
def main():
    random.seed(42)
    np.random.seed(42)
    fnames = load_latest_q_table()
    if fnames:
        top_individual = individuals.AgentwiseQTable.load(fnames[0])
    else:
        raise FileNotFoundError("Found no individuals")

    random.seed(42)
    np.random.seed(42)
    env = StarCraft2Env(map_name="2m2zFOX", difficulty="1", seed=42,
                        realtime=False)
    evaluator = evaluate.SCAbsPosEvaluator(env)

    evaluator.evaluate_single(top_individual)

    return 0
コード例 #23
0
    def __init__(self, env, args):
        self.env = env
        self.agents = Agents(args)
        self.rolloutWorker = RolloutWorker(env, self.agents, args)
        self.buffer = ReplayBuffer(args)
        self.args = args
        self.epsilon = args.epsilon

        # 用来在一个稀疏奖赏的环境上评估算法的好坏,胜利为1,失败为-1,其他普通的一步为0
        self.env_evaluate = StarCraft2Env(map_name=args.map,
                                          step_mul=args.step_mul,
                                          difficulty=args.difficulty,
                                          game_version=args.game_version,
                                          seed=args.seed,
                                          replay_dir=args.replay_dir,
                                          reward_sparse=True,
                                          reward_scale=False)
        self.evaluateWorker = RolloutWorker(self.env_evaluate, self.agents,
                                            args)
コード例 #24
0
ファイル: masterAgent.py プロジェクト: cjing9017/starcraftII
 def run(self):
     self.log = logging.getLogger('StarCraftII')
     for i in range(8):
         args = get_common_args()
         map_name = globalInformation.get_value(strings.TYPE_MAP)
         alg_name = globalInformation.get_value(strings.TYPE_POLICY)
         if map_name is not None:
             args.map = map_name
         if alg_name is not None:
             args.alg = alg_name
         args.evaluate_epoch = 100
         if args.alg == 'coma':
             args = get_coma_args(args)
         elif args.alg == 'commnet_coma':
             args = get_commnet_args(args)
         else:
             args = get_mixer_args(args)
         env = StarCraft2Env(map_name=args.map,
                             step_mul=args.step_mul,
                             difficulty=args.difficulty,
                             game_version=args.game_version,
                             replay_dir=args.replay_dir,
                             window_size_x=1418,
                             window_size_y=890,
                             window_loc=(5, 155))
         env_info = env.get_env_info()
         args.n_actions = env_info["n_actions"]
         args.n_agents = env_info["n_agents"]
         args.state_shape = env_info["state_shape"]
         args.obs_shape = env_info["obs_shape"]
         args.episode_limit = env_info["episode_limit"]
         runner = Runner(env, args)
         if args.learn:
             runner.run(i)
         else:
             win_rate = runner.evaluate_sparse()
             message = 'The win rate of {} is  {}'.format(
                 args.alg, win_rate)
             self.log.info(message)
             Signal.get_signal().emit_signal_str(message)
             break
         env.close()
コード例 #25
0
 def __init__(self,
              map_name,
              seed=123,
              step_mul=8,
              difficulty='7',
              game_version=None,
              replay_dir=""):
     self.env = StarCraft2Env(map_name=map_name,
                              step_mul=step_mul,
                              seed=123,
                              difficulty=difficulty,
                              game_version=game_version,
                              replay_dir=replay_dir)
     env_info = self.env.get_env_info()
     self.observation_space = [
         env_info["obs_shape"] for i in range(env_info["n_agents"])
     ]
     self.action_space = [
         env_info["n_actions"] for i in range(env_info["n_agents"])
     ]
     self.agent_types = ['agent' for i in range(env_info["n_agents"])]
コード例 #26
0
def make_environment(
    evaluation: bool = False,
    map_name: str = "3m",
    random_seed: Optional[int] = None,
    **kwargs: Any,
) -> dm_env.Environment:
    """Wraps an starcraft 2 environment.

    Args:
        map_name: str, name of micromanagement level.

    Returns:
        A starcraft 2 smac environment wrapped as a DeepMind environment.
    """
    del evaluation

    env = StarCraft2Env(map_name=map_name, seed=random_seed, **kwargs)

    # wrap starcraft 2 environment
    environment = SMACEnvWrapper(env)

    return environment
コード例 #27
0
def get_env(arg):
    if arguments.map == 'matrix_2':
        # 210000
        return Matrix_game2Env()
    elif arguments.map == 'matrix_3':
        return Matrix_game3Env(n_agents=2,
                               n_actions=3,
                               episode_limit=1,
                               obs_last_action=False,
                               state_last_action=False,
                               print_rew=False,
                               is_print=False)
    elif 'mmdp-' in arguments.map:
        length = int(re.findall(r'\d+\.\d+|\d+', arg.map)[-1])
        return uni_mmdp_Env(episode_limit=length)
    elif arguments.map == 'go_orderly':
        return EnvGoOrderly(map_size=6, num_agent=3)
    else:
        # 设置环境,pymarl中设置的也是环境默认参数
        return StarCraft2Env(map_name=arg.map,
                             difficulty=arg.difficulty,
                             step_mul=arg.step_mul,
                             replay_dir=arg.replay_dir)
コード例 #28
0
ファイル: random_agent.py プロジェクト: kasimte/GTMARL-SC2ENV
def main():
    env = StarCraft2Env(map_name="8m",
                        window_size_x=1920/3,
                        window_size_y=1080/3)
    env_info = env.get_env_info()

    n_actions = env_info["n_actions"]
    n_agents = env_info["n_agents"]

    n_episodes = 30

    for e in range(n_episodes):
        env.reset()
        terminated = False
        episode_reward = 0

        while not terminated:
            obs = env.get_obs()
            state = env.get_state()

            actions = []
            for agent_id in range(n_agents):
                avail_actions = env.get_avail_agent_actions(agent_id)
                avail_actions_ind = np.nonzero(avail_actions)[0]
                action = np.random.choice(avail_actions_ind)
                actions.append(action)

            reward, terminated, info = env.step(actions)
            if terminated:
                won = True if info['battle_won'] else False
                print("Battle result : {}".format(won))
            episode_reward += reward

        print("Total reward in episode {} = {}".format(e, episode_reward))

    env.close()
コード例 #29
0
def main():
    """The StarCraft II environment for decentralised multi-agent micromanagement scenarios."""
    '''difficulty ="1" is VeryEasy'''
    #replay_dir="D:\StarCraft II\Replays\smacfox"
    env = StarCraft2Env(map_name="2m2zFOX", difficulty="1")
    '''env_info= {'state_shape': 48, 'obs_shape': 30, 'n_actions': 9, 'n_agents': 3, 'episode_limit': 60}'''
    env_info = env.get_env_info()
    #print("env_info = ", env_info)
    """Returns the size of the observation."""
    """obssize =  10"""
    """obs= [array([ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ,
        0.63521415,  0.63517255, -0.00726997,  0.06666667,  0.06666667],
      dtype=float32)]"""
    obssize = env.get_obs_size()
    #print("obssize = ", obssize)

    ######################################################################
    """
    ready_agents = []
    #observation_space= Dict(action_mask:Box(9,), obs:Box(30,))
    observation_space = Dict({
            "obs": Box(-1, 1, shape=(env.get_obs_size())),
            "action_mask": Box(0, 1, shape=(env.get_total_actions()))  })
    #print ("observation_space=", observation_space)
    
    #action_space= Discrete(9)
    action_space = Discrete(env.get_total_actions())
    #print ("action_space=", action_space)
    """
    ########################################################################

    n_actions = env_info["n_actions"]
    #print ("n_actions=", n_actions)
    n_agents = env_info["n_agents"]

    n_episodes = 100  # количество эпизодов lapan = 20

    alpha = 0.5  #learning rate sayon - 0.5 больш - 0.9 Lapan = 0.2
    gamma = 0.9  #discount factor sayon - 0.9 больш - 0.5 lapan = 0.9
    epsilon = 0.7  #e-greedy sayon - 0.3 больш - 0.7 lapan = = 1.0 (100% random actions)
    bonusrewardsize = 10  # for fire - action 6

    n_statesFox = 16  # количество состояний нашего мира-сетки
    #n_statesFox1 = 16 # количество состояний нашего мира-сетки
    n_actionsFox = 7  # вводим свое количество действий, которые понадобятся

    Q_table = np.zeros([n_agents, n_statesFox,
                        n_actions])  #задаем пустую q таблицу
    #Q_table1 = np.zeros([n_statesFox1, n_actionsFox])
    #Q_table = np.zeros([32, n_actions])
    #print (Q_table)

    for e in range(n_episodes):
        #print("n_episode =                       ", e)
        """Reset the environment. Required after each full episode.Returns initial observations and states."""
        env.reset()
        ''' Battle is over terminated = True'''
        terminated = False
        episode_reward = 0

        #n_steps = 1 #пока не берем это количество шагов для уменьгения награды за долгий поиск
        """
        # вывод в файл
        fileobj = open("файл.txt", "wt")
        print("text",file=fileobj)
        fileobj.close()
        """

        #динамический epsilon - только при большом количестве эпизодов имеет смысл!!!

        if e % 15 == 0:
            epsilon += (1 - epsilon) * 10 / n_episodes
            print("epsilon = ", epsilon)

        #stoprun = [0,0,0,0,0]

        while not terminated:
            """Returns observation for agent_id."""
            obs = env.get_obs()
            #print ("obs=", obs)
            """Returns the global state."""
            #state = env.get_state()

            actions = []
            action = 0
            bonusreward = np.zeros([n_agents])
            stateFox = np.zeros([n_agents])
            '''agent_id= 0, agent_id= 1'''
            for agent_id in range(n_agents):

                #получаем характеристики юнита
                unit = env.get_unit_by_id(agent_id)
                #получаем состояние по координатам юнита
                stateFox[agent_id] = get_stateFox(agent_id, unit.pos.x,
                                                  unit.pos.y)
                #print ("agent_id =", agent_id)
                #print ("stateFox[agent_id] =", stateFox[agent_id])
                '''
                tag = unit.tag #много разных характеристик юнита
                x = unit.pos.x
                y = unit.pos.y
                '''
                """Returns the available actions for agent_id."""
                """avail_actions= [0, 1, 1, 1, 1, 1, 0, 0, 0]"""
                avail_actions = env.get_avail_agent_actions(agent_id)
                '''Функция nonzero() возвращает индексы ненулевых элементов массива.'''
                """avail_actions_ind of agent_id == 0: [1 2 3 4 5]"""
                avail_actions_ind = np.nonzero(avail_actions)[0]
                # выбираем действие
                action = select_actionFox(agent_id, stateFox[agent_id],
                                          avail_actions_ind, n_actionsFox,
                                          epsilon, Q_table)
                if action == 6 or action == 7:
                    bonusreward[agent_id] += bonusrewardsize
                    #print ('bonusreward[agent_id]=', bonusreward[agent_id])

                #собираем действия от разных агентов
                actions.append(action)

                ###############_Бежим вправо и стреляем_################################
                """
                if is_possible_action(avail_actions_ind, 6) == True:
                    action = 6
                else:
                    if is_possible_action(avail_actions_ind, 4) == True:
                        action = 4
                    else:
                        action = np.random.choice(avail_actions_ind)
                        #Случайная выборка из значений заданного одномерного массива
                  """
                #####################################################################
                """Функция append() добавляет элементы в конец массива."""
                #print("agent_id=",agent_id,"avail_actions_ind=", avail_actions_ind, "action = ", action, "actions = ", actions)
                #f.write(agent_id)
                #f.write(avail_actions_ind)
                #собираем действия от разных агентов
                #actions.append(action)

            #как узнать куда стрелять? в определенного человека?
            #как узнать что делают другие агенты? самому создавать для них глобальное состояние
            #раз я ими управляю?
            """A single environment step. Returns reward, terminated, info."""
            reward, terminated, _ = env.step(actions)
            #print ('actions=', actions)
            #print ('bonusreward[0]=', bonusreward[0])
            #print ('bonusreward[1]=', bonusreward[1])
            reward += (bonusreward[0] + bonusreward[1])

            episode_reward += reward

            ###################_Обучаем_##############################################

            for agent_id in range(n_agents):
                #получаем характеристики юнита
                unit = env.get_unit_by_id(agent_id)
                #получаем состояние по координатам юнита
                stateFox_next = get_stateFox(agent_id, unit.pos.x, unit.pos.y)
                stateFoxint = int(stateFox[agent_id])

                Q_table[agent_id, stateFoxint, action] = Q_table[agent_id, stateFoxint, action] + alpha * \
                             (reward + gamma * np.max(Q_table[agent_id, stateFox_next, :]) - Q_table[agent_id, stateFoxint, action])

            ##########################################################################

        #Total reward in episode 4 = 20.0
        print("Total reward in episode {} = {}".format(e, episode_reward))
        #get_stats()= {'battles_won': 2, 'battles_game': 5, 'battles_draw': 0, 'win_rate': 0.4, 'timeouts': 0, 'restarts': 0}
        print("get_stats()=", env.get_stats())

    #env.save_replay() """Save a replay."""
    """"Close StarCraft II.""" ""
    env.close()
    print(Q_table)
    with open("se20.pkl", 'wb') as f:
        pickle.dump(Q_table, f)
コード例 #30
0
 def __init__(self, max_cycles, **smac_args):
     EzPickle.__init__(self, max_cycles, **smac_args)
     env = StarCraft2Env(**smac_args)
     super().__init__(env, max_cycles)