def main(unused_argv): #env = gym.make("SC2GYMENV-v0") #env.settings['map_name'] = 'ScoutSimple64' rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) logger.configure(dir=FLAGS.train_log_dir, format_strs=['log']) players = [] players.append( sc2_env.Bot(races[FLAGS.bot_race], difficulties[FLAGS.difficulty])) players.append(sc2_env.Agent(races[FLAGS.agent_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError env = ZergScoutEnv( map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) network = model(FLAGS.wrapper) #deepq.models.mlp([64, 32]) print('params, lr={} bf={} ef={} ef_eps={}'.format(FLAGS.param_lr, FLAGS.param_bf, FLAGS.param_ef, FLAGS.param_efps)) act = deepq.learn(env, q_func=network, lr=FLAGS.param_lr, max_timesteps=100000, buffer_size=FLAGS.param_bf, exploration_fraction=FLAGS.param_ef, exploration_final_eps=FLAGS.param_efps, checkpoint_path=FLAGS.checkpoint_path, checkpoint_freq=FLAGS.checkpoint_freq, print_freq=10, callback=callback)
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) logger.configure(dir=FLAGS.train_log_dir, format_strs=['log']) players = [] players.append(sc2_env.Agent(races[FLAGS.agent_race])) players.append(sc2_env.Agent(races[FLAGS.oppo_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError agents = [ZergBotAgent()] env = ZergScoutSelfplayEnv( agents, map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) network = model(FLAGS.wrapper) #deepq.models.mlp([64, 32]) print('params, lr={} bf={} ef={} ef_eps={}'.format(FLAGS.param_lr, FLAGS.param_bf, FLAGS.param_ef, FLAGS.param_efps)) random_support = False total_rwd = 0.0 act_val = 1 try: obs = env.reset() n_step = 0 # run this episode while True: n_step += 1 #print('observation=', obs, 'observation_none=', obs[None]) action = act_val #act(obs[None])[0] obs, rwd, done, other = env.step(action) print('action=', action, '; rwd=', rwd, '; step=', n_step) total_rwd += rwd if other: act_val = 7 if random_support: act_val = random.randint(0, 8) if n_step == 50: act_val = 3 ''' if n_step == 20: act_val = 0 elif n_step == 94: act_val = 1 ''' #print('step rwd=', rwd, ',action=', action, "obs=", obs) if done: print("game over, total_rwd=", total_rwd) break except KeyboardInterrupt: pass finally: print("evaluation over") env.unwrapped.save_replay('evaluate') env.close()
def main(unused_argv): rs = FLAGS.random_seed if FLAGS.random_seed is None: rs = int((time.time() % 1) * 1000000) players = [] players.append(sc2_env.Agent(races[FLAGS.agent_race])) players.append(sc2_env.Agent(races[FLAGS.oppo_race])) screen_res = (int(FLAGS.screen_ratio * FLAGS.screen_resolution) // 4 * 4, FLAGS.screen_resolution) if FLAGS.agent_interface_format == 'feature': agent_interface_format = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) elif FLAGS.agent_interface_format == 'rgb': agent_interface_format = sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=screen_res, minimap=FLAGS.minimap_resolution)) else: raise NotImplementedError agents = [ZergBotAgent()] env = ZergScoutSelfplayEnv( agents, map_name=FLAGS.map, players=players, step_mul=FLAGS.step_mul, random_seed=rs, game_steps_per_episode=FLAGS.max_step, agent_interface_format=agent_interface_format, score_index=-1, # this indicates the outcome is reward disable_fog=FLAGS.disable_fog, visualize=FLAGS.render) env = make(FLAGS.wrapper, env) network = model(FLAGS.wrapper) #deepq.models.mlp([64, 32]) model_dir = FLAGS.model_dir act = deepq.load_model(env, network, model_dir) total_rwd = 0 try: obs = env.reset() n_step = 0 # run this episode while True: n_step += 1 #print('observation=', obs, 'observation_none=', obs[None]) action = act(obs[None])[0] obs, rwd, done, _ = env.step(action) print('action=', action, '; rwd=', rwd) #print('step rwd=', rwd, ',action=', action, "obs=", obs) total_rwd += rwd if done: print("game over, total_rwd=", total_rwd) break except KeyboardInterrupt: pass finally: print("evaluation over") env.unwrapped.save_replay('evaluate') env.close()