예제 #1
0
def main(unused_argv):
    rs = FLAGS.random_seed
    if FLAGS.random_seed is None:
        rs = int((time.time() % 1) * 1000000)

    logger.configure(dir=FLAGS.train_log_dir, format_strs=['log'])

    players = []
    players.append(sc2_env.Agent(races[FLAGS.agent_race]))
    players.append(sc2_env.Agent(races[FLAGS.oppo_race]))

    screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4,
                  FLAGS.screen_resolution)
    if FLAGS.agent_interface_format == 'feature':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    elif FLAGS.agent_interface_format == 'rgb':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            rgb_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    else:
        raise NotImplementedError

    agents = [ZergBotAgent()]

    env = ZergScoutSelfplayEnv(
        agents,
        map_name=FLAGS.map,
        players=players,
        step_mul=FLAGS.step_mul,
        random_seed=rs,
        game_steps_per_episode=FLAGS.max_step,
        agent_interface_format=agent_interface_format,
        score_index=-1,  # this indicates the outcome is reward
        disable_fog=FLAGS.disable_fog,
        visualize=FLAGS.render)

    env = make(FLAGS.wrapper, env)

    network = model(FLAGS.wrapper)  #deepq.models.mlp([64, 32])

    print('params, lr={} bf={} ef={} ef_eps={}'.format(FLAGS.param_lr,
                                                       FLAGS.param_bf,
                                                       FLAGS.param_ef,
                                                       FLAGS.param_efps))

    random_support = False
    total_rwd = 0.0
    act_val = 1
    try:
        obs = env.reset()
        n_step = 0
        # run this episode
        while True:
            n_step += 1
            #print('observation=', obs, 'observation_none=', obs[None])
            action = act_val  #act(obs[None])[0]
            obs, rwd, done, other = env.step(action)
            print('action=', action, '; rwd=', rwd, '; step=', n_step)
            total_rwd += rwd
            if other:
                act_val = 7
            if random_support:
                act_val = random.randint(0, 8)

            if n_step == 50:
                act_val = 3
            '''
            if n_step == 20:
                act_val = 0
            elif n_step == 94:
                act_val = 1
            '''
            #print('step rwd=', rwd, ',action=', action, "obs=", obs)
            if done:
                print("game over, total_rwd=", total_rwd)
                break
    except KeyboardInterrupt:
        pass
    finally:
        print("evaluation over")
    env.unwrapped.save_replay('evaluate')
    env.close()
예제 #2
0
def main(unused_argv):
    rs = FLAGS.random_seed
    if FLAGS.random_seed is None:
        rs = int((time.time() % 1) * 1000000)

    players = []
    players.append(sc2_env.Agent(races[FLAGS.agent_race]))
    players.append(sc2_env.Agent(races[FLAGS.oppo_race]))

    screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4,
                  FLAGS.screen_resolution)
    if FLAGS.agent_interface_format == 'feature':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    elif FLAGS.agent_interface_format == 'rgb':
        agent_interface_format = sc2_env.AgentInterfaceFormat(
            rgb_dimensions=sc2_env.Dimensions(
                screen=screen_res, minimap=FLAGS.minimap_resolution))
    else:
        raise NotImplementedError

    agents = [ZergBotAgent()]

    env = ZergScoutSelfplayEnv(
        agents,
        map_name=FLAGS.map,
        players=players,
        step_mul=FLAGS.step_mul,
        random_seed=rs,
        game_steps_per_episode=FLAGS.max_step,
        agent_interface_format=agent_interface_format,
        score_index=-1,  # this indicates the outcome is reward
        disable_fog=FLAGS.disable_fog,
        visualize=FLAGS.render)

    env = make(FLAGS.wrapper, env)
    network = model(FLAGS.wrapper)  #deepq.models.mlp([64, 32])
    model_dir = FLAGS.model_dir
    act = deepq.load_model(env, network, model_dir)
    total_rwd = 0

    try:
        obs = env.reset()
        n_step = 0
        # run this episode
        while True:
            n_step += 1
            #print('observation=', obs, 'observation_none=', obs[None])
            action = act(obs[None])[0]
            obs, rwd, done, _ = env.step(action)
            print('action=', action, '; rwd=', rwd)
            #print('step rwd=', rwd, ',action=', action, "obs=", obs)
            total_rwd += rwd
            if done:
                print("game over, total_rwd=", total_rwd)
                break
    except KeyboardInterrupt:
        pass
    finally:
        print("evaluation over")
    env.unwrapped.save_replay('evaluate')
    env.close()