def main(unused_argv): #env = gym.make("SC2GYMENV-v0") #env.settings['map_name'] = 'ScoutSimple64' rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) logger.configure(dir=FLAGS.train_log_dir, format_strs=['log']) players = [] players.append( sc2_env.Bot(races[FLAGS.bot_race], difficulties[FLAGS.difficulty])) players.append(sc2_env.Agent(races[FLAGS.agent_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError env = ZergScoutEnv( map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) network = model(FLAGS.wrapper) #deepq.models.mlp([64, 32]) print('params, lr={} bf={} ef={} ef_eps={}'.format(FLAGS.param_lr, FLAGS.param_bf, FLAGS.param_ef, FLAGS.param_efps)) act = deepq.learn(env, q_func=network, lr=FLAGS.param_lr, max_timesteps=100000, buffer_size=FLAGS.param_bf, exploration_fraction=FLAGS.param_ef, exploration_final_eps=FLAGS.param_efps, checkpoint_path=FLAGS.checkpoint_path, checkpoint_freq=FLAGS.checkpoint_freq, print_freq=10, callback=callback)
def _thunk(): agents = [ZergBotAgent()] env = ZergScoutSelfplayEnv( agents, map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=seed, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) return env
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) logger.configure(dir=FLAGS.train_log_dir, format_strs=['log']) players = [] players.append(sc2_env.Agent(races[FLAGS.agent_race])) players.append(sc2_env.Agent(races[FLAGS.oppo_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError agents = [ZergBotAgent()] env = ZergScoutSelfplayEnv( agents, map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) network = model(FLAGS.wrapper) #deepq.models.mlp([64, 32]) print('params, lr={} bf={} ef={} ef_eps={}'.format(FLAGS.param_lr, FLAGS.param_bf, FLAGS.param_ef, FLAGS.param_efps)) random_support = False total_rwd = 0.0 act_val = 1 try: obs = env.reset() n_step = 0 # run this episode while True: n_step += 1 #print('observation=', obs, 'observation_none=', obs[None]) action = act_val #act(obs[None])[0] obs, rwd, done, other = env.step(action) print('action=', action, '; rwd=', rwd, '; step=', n_step) total_rwd += rwd if other: act_val = 7 if random_support: act_val = random.randint(0, 8) if n_step == 50: act_val = 3 ''' if n_step == 20: act_val = 0 elif n_step == 94: act_val = 1 ''' #print('step rwd=', rwd, ',action=', action, "obs=", obs) if done: print("game over, total_rwd=", total_rwd) break except KeyboardInterrupt: pass finally: print("evaluation over") env.unwrapped.save_replay('evaluate') env.close()
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) players = [] players.append(sc2_env.Agent(races[FLAGS.agent_race])) players.append(sc2_env.Agent(races[FLAGS.oppo_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions = sc2_env.Dimensions(screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError agents = [ZergBotAgent()] ncpu = 1 if sys.platform == 'darwin': ncpu //= 2 config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) config.gpu_options.allow_growth = True # pylint: disable=E1101 tf.Session(config=config).__enter__() # env = make_sc2_dis_env(num_env=1, seed=rs, players=players, agent_interface_format=agent_interface_format) model_dir = FLAGS.model_dir total_rwd = 0 env = ZergScoutSelfplayEnv( agents, map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render ) env = make(FLAGS.wrapper, env) agent = ppo2.load_model(CnnPolicy,env,model_dir) try: obs = env.reset() state = agent.initial_state n_step = 0 done = False # run this episode while True: n_step += 1 obs = np.reshape(obs, (1,) + obs.shape) # convert shape (32,32,20) to (1,32,32,20) action, value, state, _ = agent.step(obs, state, done) obs, rwd, done, info = env.step(action) print('action=', action, '; rwd=', rwd) # print('step rwd=', rwd, ',action=', action, "obs=", obs) total_rwd += rwd if done: print("game over, total_rwd=", total_rwd) break except KeyboardInterrupt: pass finally: print("evaluation over") env.unwrapped.save_replay('evaluate')
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) bot_difficulty = difficulties[FLAGS.difficulty] players = [] players.append(sc2_env.Agent(races[FLAGS.agent_race])) players.append(sc2_env.Bot(races['Z'], bot_difficulty)) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError env = FullGameScoutEnv( map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) random_support = False total_rwd = 0.0 act_val = 5 try: obs = env.reset() n_step = 0 # run this episode while True: n_step += 1 #print('observation=', obs, 'observation_none=', obs[None]) action = act_val #act(obs[None])[0] obs, rwd, done, other = env.step(action) print('action=', action, '; rwd=', rwd, '; step=', n_step) total_rwd += rwd if other: act_val = 7 if random_support: act_val = random.randint(0, 8) if n_step == 130: act_val = 8 #print('step rwd=', rwd, ',action=', action, "obs=", obs) if done: print("game over, total_rwd=", total_rwd) break except KeyboardInterrupt: pass finally: print("evaluation over") env.unwrapped.save_replay('evaluate') env.close()
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) players = [] players.append(sc2_env.Agent(races[FLAGS.agent_race])) players.append(sc2_env.Agent(races[FLAGS.oppo_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError agents = [ZergBotAgent()] env = ZergScoutSelfplayEnv( agents, map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) network = model(FLAGS.wrapper) #deepq.models.mlp([64, 32]) model_dir = FLAGS.model_dir act = deepq.load_model(env, network, model_dir) total_rwd = 0 try: obs = env.reset() n_step = 0 # run this episode while True: n_step += 1 #print('observation=', obs, 'observation_none=', obs[None]) action = act(obs[None])[0] obs, rwd, done, _ = env.step(action) print('action=', action, '; rwd=', rwd) #print('step rwd=', rwd, ',action=', action, "obs=", obs) total_rwd += rwd if done: print("game over, total_rwd=", total_rwd) break except KeyboardInterrupt: pass finally: print("evaluation over") env.unwrapped.save_replay('evaluate') env.close()