def run_thread(agent, map_name, visualize): with sc2_env.SC2Env(map_name=map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=visualize) as env: #env = available_actions_printer.AvailableActionsPrinter(env) # Only for a single player! # replay_buffer = [] for replay_buffer, num_echos, is_done in run_loop( FLAGS, [agent], env, MAX_AGENT_STEPS): #print('isdone = ', is_done) #print('recorder = ', recorder) if FLAGS.training and num_echos: # replay_buffer.append(recorder) if is_done: counter = 0 with LOCK: global COUNTER COUNTER += 1 counter = COUNTER # Learning rate schedule learning_rate = FLAGS.learning_rate * ( 1 - 0.9 * counter / FLAGS.max_steps) agent.update(replay_buffer, FLAGS.discount, learning_rate, counter) # replay_buffer = [] if counter % FLAGS.snapshot_step == 1: agent.save_model(SNAPSHOT, counter) if counter >= FLAGS.max_steps: break # elif is_done: # obs = recorder[-1].observation # score = obs["score_cumulative"][0] # print('Your score is '+str(score)+'!') if FLAGS.save_replay: env.save_replay(agent.name)
def init_game(game_params, max_steps=256, step_multiplier=8, **kwargs): race = sc2_env.Race(1) # 1 = terran agent = sc2_env.Agent(race, "Testv0") # NamedTuple [race, agent_name] agent_interface_format = sc2_env.parse_agent_interface_format( **game_params) #AgentInterfaceFormat instance game_params = dict( map_name='MoveToBeacon', # simplest minigame players=[agent], # use a list even for single player game_steps_per_episode=max_steps * step_multiplier, agent_interface_format=[agent_interface_format ] # use a list even for single player ) env = sc2_env.SC2Env(**game_params, **kwargs) return env
def test_defeat_roaches_raw(self): with sc2_env.SC2Env( map_name="DefeatRoaches", players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=sc2_env.AgentInterfaceFormat( action_space=sc2_env.ActionSpace. RAW, # or: use_raw_actions=True, use_raw_units=True), step_mul=self.step_mul, game_steps_per_episode=100 * self.steps * self.step_mul) as env: agent = scripted_agent.DefeatRoachesRaw() run_loop.run_loop([agent], env, self.steps) # Get some points self.assertLessEqual(agent.episodes, agent.reward) self.assertEqual(agent.steps, self.steps)
def test_collect_mineral_shards_feature_units(self): with sc2_env.SC2Env( map_name="CollectMineralShards", players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=84, minimap=64), use_feature_units=True), step_mul=self.step_mul, game_steps_per_episode=self.steps * self.step_mul) as env: agent = scripted_agent.CollectMineralShardsFeatureUnits() run_loop.run_loop([agent], env, self.steps) # Get some points self.assertLessEqual(agent.episodes, agent.reward) self.assertEqual(agent.steps, self.steps)
def run_thread(agent_cls, map_name, visualize): with sc2_env.SC2Env(map_name, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) agent = agent_cls() run_loop.run_loop([agent], env, FLAGS.max_agent_steps) if FLAGS.save_replay: env.save_replay(agent_cls.__name__)
def main(unused_argv): agent = ZergAgent() try: with sc2_env.SC2Env( map_name="AbyssalReef", players=[sc2_env.Agent(sc2_env.Race.zerg), sc2_env.Bot(sc2_env.Race.random, sc2_env.Difficulty.very_easy)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), use_feature_units=True), step_mul=16, game_steps_per_episode=0, visualize=False) as env: run_loop.run_loop([agent], env) except KeyboardInterrupt: pass
def main(unuesed_argv): agent = defeatRoachesAgent.DDQNAgent(_TRAIN, _SCREEN, _SQUARE_COUNT) plot = DynamicUpdate() plot.on_launch(); xdata = [] ydata = [] episodes = 0 while episodes <= _EPISODES: try: with sc2_env.SC2Env( map_name="DefeatRoaches", players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=_SCREEN, minimap=_MINIMAP), use_feature_units=True), step_mul=8, visualize=_VISUALIZE ) as env: while episodes <= _EPISODES: agent.setup(env.observation_spec(), env.action_spec()) timesteps = env.reset() agent.reset() while True: step_actions = [agent.step(timesteps[0])] agent.episode = episodes if timesteps[0].last(): break timesteps = env.step(step_actions) xdata.append(episodes) ydata.append(agent.moving_average) plot.on_running(xdata, ydata) plt.pause(0.001) episodes += 1 agent.episode = episodes print('random: ', agent.random_action, ' chosen: ', agent.chosen_action, ' epsilon: ', agent.epsilon, ' episode: ', agent.episode) # print(agent.model.get_weights()) agent.chosen_action = 0 agent.random_action = 0 except KeyboardInterrupt: pass
def worker(remote, visualize): env = sc2_env.SC2Env(map_name='MoveToBeacon', step_mul=4, visualize=visualize, screen_size_px=(64, 64), minimap_size_px=(64, 64)) done = False while True: cmd, action, obs, global_step = remote.recv() end_step = 100 if cmd == 'step': if not action == 'done': while not 331 in obs[0].observation[ 'available_actions']: #마린을 선택하기 actions = actAgent2Pysc2(100, obs) obs = env.step(actions=[actions]) a = actAgent2Pysc2(action, obs) obs = env.step(actions=[a]) for i in range(1): actions = no_operation(obs) obs = env.step(actions=[actions]) state = obs2state(obs) distance = obs2distance(obs) reward = -0.01 if distance < 0.03 or global_step == end_step - 1: if distance < 0.03: reward = 1 if global_step == end_step - 1: reward = -1 done = True remote.send((obs, state, action, reward, done)) else: remote.send((0, 0, 0, 0, True)) if cmd == 'reset': done = False obs = env.reset() #env 초기화 while not 331 in obs[0].observation['available_actions']: #마린을 선택하기 actions = actAgent2Pysc2(100, obs) obs = env.step(actions=[actions]) state = obs2state(obs) remote.send((obs, state, 0, 0, False)) if cmd == 'close': remote.close() break
def run_thread(agent_cls, map_name, args): with sc2_env.SC2Env(map_name=map_name, agent_race=args["agent_race"], bot_race=args["bot_race"], difficulty=args["difficulty"], step_mul=args["step_mul"], game_steps_per_episode=args["game_steps_per_episode"], screen_size_px=(args["screen_resolution"], args["screen_resolution"]), minimap_size_px=(args["minimap_resolution"], args["minimap_resolution"]), visualize=True) as env: env = available_actions_printer.AvailableActionsPrinter(env) agent = agent_cls() run_loop.run_loop([agent], env, args["max_agent_steps"]) if args["save_replay"]: env.save_replay(agent_cls.__name__)
def train(): FLAGS(sys.argv) with sc2_env.SC2Env(map_name="MoveToBeacon", step_mul=step_mul) as env: sess = tf.Session() actor = Actor(sess, n_features=2, n_actions=4, lr=0.001) critic = Critic(sess, n_features=2, lr=0.001) sess.run(tf.global_variables_initializer()) for episodes in range(EPISODES): done = False obs = env.reset() while not 331 in obs[0].observation["available_actions"]: actions = actAgent2Pysc2(100, obs) obs = env.step(actions=[actions]) state = np.array(obs2state(obs)) print('episode start') global_step = 0 reward = 0 while not done: global_step += 1 time.sleep(0.2) action = actor.choose_action(state) actions = actAgent2Pysc2(action, obs) obs = env.step(actions=[actions]) for i in range(3): actions = no_operation(obs) obs = env.step(actions=[actions]) distance = obs2distance(obs) if global_step == 1: pre_distance = distance next_state = np.array(obs2state(obs)) reward = -(distance - pre_distance) * 400 if distance < 0.03 or global_step == 200: # 게임 종료시 if distance < 0.03: reward = 10 if global_step == 200: reward = -10 done = True td_error = critic.learn(state, reward, next_state) actor.learn(state, action, td_error) if distance < 0.03 or global_step == 200: # 게임 종료시 break state = next_state pre_distance = distance
def make_env(opt): map_inst = maps.get(opt.map) players = [] agent_module, agent_name = opt.agent.rsplit(".", 1) players.append( sc2_env.Agent(sc2_env.Race[opt.agent_race], opt.agent_name or agent_name)) if map_inst.players >= 2: if opt.agent2 == "Bot": players.append( sc2_env.Bot(sc2_env.Race[opt.agent2_race], sc2_env.Difficulty[opt.difficulty], sc2_env.BotBuild[opt.bot_build])) else: agent_module, agent_name = opt.agent2.rsplit(".", 1) players.append( sc2_env.Agent(sc2_env.Race[opt.agent2_race], opt.agent2_name or agent_name)) env = sc2_env.SC2Env( map_name=opt.map, battle_net_map=opt.battle_net_map, players=players, agent_interface_format=sc2_env.parse_agent_interface_format( feature_screen=opt.feature_screen_size, feature_minimap=opt.feature_minimap_size, rgb_screen=opt.rgb_screen_size, rgb_minimap=opt.rgb_minimap_size, action_space=opt.action_space, use_feature_units=opt.use_feature_units, use_raw_units=opt.use_raw_units, use_camera_position=True, send_observation_proto=False, camera_width_world_units=48), step_mul=opt.step_mul, game_steps_per_episode=4000, disable_fog=opt.disable_fog, visualize=True, ensure_available_actions=True) return env
def main(unused_argv): agent = RawAgent() try: while True: with sc2_env.SC2Env( map_name="Simple64", players=[sc2_env.Agent(sc2_env.Race.protoss), sc2_env.Bot(sc2_env.Race.protoss, sc2_env.Difficulty.very_easy)], agent_interface_format=features.AgentInterfaceFormat( action_space=actions.ActionSpace.RAW, use_raw_units=True, raw_resolution=64, ), ) as env: run_loop.run_loop([agent], env) except KeyboardInterrupt: pass
def __init__(self, name, trainer, model_path, global_episodes): self.name = "worker_" + str(name) self.number = name self.model_path = model_path self.trainer = trainer self.global_episodes = global_episodes self.increment = self.global_episodes.assign_add(1) self.episode_rewards = [] self.episode_lengths = [] self.episode_mean_values = [] self.summary_writer = tf.summary.FileWriter("train_" + str(self.number)) #Create the local copy of the network and the tensorflow op to copy global paramters to local network self.local_AC = AC_Network(self.name, trainer) self.update_local_ops = update_target_graph('global', self.name) self.env = sc2_env.SC2Env(map_name="DefeatRoaches")
def test_defeat_zerglings(self): FLAGS(sys.argv) with sc2_env.SC2Env("DefeatZerglingsAndBanelings", step_mul=self.step_mul, visualize=True, game_steps_per_episode=self.steps * self.step_mul) as env: obs = env.step(actions=[sc2_actions.FunctionCall(_NO_OP, [])]) player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE] # Break Point!! print(player_relative) agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, self.steps) self.assertEqual(agent.steps, self.steps)
def create_environment(): game = sc2_env.SC2Env( map_name=FLAGS.map, step_mul=FLAGS.step_mul, agent_interface_format=AgentInterfaceFormat( feature_dimensions=Dimensions( (FLAGS.screen_resolution, FLAGS.screen_resolution), (FLAGS.minimap_resolution, FLAGS.minimap_resolution)), action_space=ActionSpace.FEATURES, camera_width_world_units=FLAGS.camera_width), visualize=FLAGS.render) # Here we create an hot encoded version of our actions (5 possible actions) # possible_actions = [[1, 0, 0, 0, 0], [0, 1, 0, 0, 0]...] possible_actions = np.identity(len(actions.FUNCTIONS), dtype=int).tolist() env = available_actions_printer.AvailableActionsPrinter(game) return env, possible_actions
def create_sc2_minigame_env(map_name, mode, visualize=False): """Create sc2 game env with available actions printer Set screen, minimap same resolution and x, y same pixels for simplicity. """ assert mode in ['full', 'lite', 'test'] # workaround for pysc2 flags FLAGS = flags.FLAGS FLAGS([__file__]) env_seed = 3 if mode == 'test' else None env = sc2_env.SC2Env(map_name=map_name, step_mul=sc2_cfg[mode]['step_mul'], screen_size_px=(sc2_cfg[mode]['resl'], ) * 2, minimap_size_px=(sc2_cfg[mode]['resl'], ) * 2, visualize=visualize, random_seed=env_seed) return env
def test_observation_matches_obs_spec(self): with sc2_env.SC2Env( map_name="Simple64", feature_screen_width=84, feature_screen_height=87, feature_minimap_width=64, feature_minimap_height=67) as env: multiplayer_obs_spec = env.observation_spec() self.assertIsInstance(multiplayer_obs_spec, tuple) self.assertLen(multiplayer_obs_spec, 1) obs_spec = multiplayer_obs_spec[0] multiplayer_action_spec = env.action_spec() self.assertIsInstance(multiplayer_action_spec, tuple) self.assertLen(multiplayer_action_spec, 1) action_spec = multiplayer_action_spec[0] agent = random_agent.RandomAgent() agent.setup(obs_spec, action_spec) multiplayer_obs = env.reset() agent.reset() for _ in range(100): self.assertIsInstance(multiplayer_obs, tuple) self.assertLen(multiplayer_obs, 1) raw_obs = multiplayer_obs[0] obs = raw_obs.observation self.assertItemsEqual(obs_spec.keys(), obs.keys()) for k, o in six.iteritems(obs): descr = "%s: spec: %s != obs: %s" % (k, obs_spec[k], o.shape) if o.shape == (0,): # Empty tensor can't have a shape. self.assertIn(0, obs_spec[k], descr) else: self.assertEqual(len(obs_spec[k]), len(o.shape), descr) for a, b in zip(obs_spec[k], o.shape): if a != 0: self.assertEqual(a, b, descr) act = agent.step(raw_obs) multiplayer_act = (act,) multiplayer_obs = env.step(multiplayer_act)
def main(unused_argv): agent = SupportAI(game_type) try: while True: with sc2_env.SC2Env( map_name="Simple64", players=[ sc2_env.Agent(player_race), sc2_env.Bot(enemy_race, sc2_env.Difficulty.very_easy) ], agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=86, minimap=86), use_feature_units=True), step_mul=16, game_steps_per_episode=0, visualize=False, realtime=True) as env: feats = features.Features( features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=86, minimap=86))) action_spec = feats.action_spec() agent.setup(env.observation_spec(), env.action_spec()) timesteps = env.reset() agent.reset() while True: step_actions = [agent.step(timesteps[0])] feats = features.Features( features.AgentInterfaceFormat( feature_dimensions=features.Dimensions( screen=86, minimap=86))) action_spec = feats.action_spec() if timesteps[0].last(): break timesteps = env.step(step_actions) except KeyboardInterrupt: pass
def start(self): """ Run the environment :return: None """ total_frames = 0 with sc2_env.SC2Env(map_name=self.map, agent_interface_format=self.aif, step_mul=self.step_mul, game_steps_per_episode=self.max_timesteps * self.step_mul // 2, visualize=self.visualize) as env: start_time = time.time() try: timesteps = self.setup(env) for episode in range(self.max_episodes): ### RUN 1 SIMULATION ### for step in range(self.max_timesteps): total_frames += 1 agent_obs, global_obs = self.allot_observations( timesteps) actions = [ agent.step(obs) for agent, obs in zip(self.agents, agent_obs) ] # loss = critic(global_obs, actions) if timesteps[0].last(): break timesteps = env.step([actions]) ### RESET ### timesteps = env.reset() for a in self.agents: a.reset() except KeyboardInterrupt: pass finally: elapsed_time = time.time() - start_time print( "Took %.3f seconds for %s steps: %.3f fps" % (elapsed_time, total_frames, total_frames / elapsed_time))
def worker(remote, map_name): with sc2_env.SC2Env(map_name, step_mul=1) as env: while True: cmd, data = remote.recv() if cmd == 'step': print("action : ", data) result = env.step(actions=data) ob = result[0].observation["screen"] reward = result[0].reward done = result[0].step_type == environment.StepType.LAST info = result[0].observation["available_actions"] if done: result = env.reset() # ob = result[0].observation["screen"] # reward = result[0].reward # done = result[0].step_type == environment.StepType.LAST info = result[0].observation["available_actions"] print("ob : ", ob.shape) print("reward : ", reward) print("done : ", done) print("info : ", info) remote.send((ob, reward, done, info)) elif cmd == 'reset': result = env.reset() ob = result[0].observation["screen"] reward = result[0].reward done = result[0].step_type == environment.StepType.LAST info = result[0].observation["available_actions"] remote.send((ob, reward, done, info)) elif cmd == 'close': remote.close() break elif cmd == 'get_spaces': remote.send((env.action_spec().functions[data], "")) elif cmd == "action_spec": remote.send((env.action_spec().functions[data])) else: raise NotImplementedError
def main(unused_argv): agent1 = SmartAgent() agent2 = RandomAgent() try: with sc2_env.SC2Env( map_name="Simple64", players=[sc2_env.Agent(sc2_env.Race.terran), sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=features.AgentInterfaceFormat( action_space=actions.ActionSpace.RAW, use_raw_units=True, raw_resolution=64, ), step_mul=53, # 2400/step_mul = apm (48 step_mul is 50 apm, 53 is 45 apm) disable_fog=True, ) as env: run_loop.run_loop([agent1, agent2], env, max_episodes=1000) except KeyboardInterrupt: pass
def main(): FLAGS(sys.argv) with sc2_env.SC2Env(map_name="DefeatZerglingsAndBanelings", step_mul=step_mul, visualize=False, game_steps_per_episode=steps * step_mul) as env: dqn.learn(env, num_actions=3, lr=1e-4, max_timesteps=10000000, buffer_size=100000, exploration_fraction=0.5, exploration_final_eps=0.01, train_freq=2, learning_starts=100000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, num_cpu=2)
def main(unused_argv): #la sección principal, #desde donde se configuran los parametros de la partida en turno agent = ZergAgent() try: while True: with sc2_env.SC2Env( #Se manda a llamar el juego map_name= "AbyssalReef", #Se indica en que mapa se realizara la partida players=[ sc2_env.Agent( sc2_env.Race.zerg ), #Un jugador sera nuestro Bot de raza Zerg sc2_env.Bot( sc2_env.Race. random, #2° jugador sera el Bot del juego sc2_env.Difficulty.very_easy) ], #en dificultad very_easy agent_interface_format=features.AgentInterfaceFormat( #dimenciones de pantalla y minimapa feature_dimensions=features.Dimensions(screen=84, minimap=64), use_feature_units=True), step_mul=16, game_steps_per_episode= 0, #con 0 indicamos que el juego no termine en un numero determinado de pasos visualize=False ) as env: #seleccionamos que no se visualicen las capas proporcionadas por pysc2 agent.setup(env.observation_spec(), env.action_spec()) timesteps = env.reset() agent.reset() while True: #durante cada juego a cada paso se manda a llamar la sección "step" de nuestro Bot step_actions = [agent.step(timesteps[0])] if timesteps[0].last(): break #si el juego termina salimos del bucle #pasamos al juego las acciones (o partes de estas) que tomo nuestro Bot timesteps = env.step(step_actions) except KeyboardInterrupt: pass
def run_thread(agent_classes, players, map_name, visualize): with sc2_env.SC2Env( map_name=map_name, players=players, step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, feature_screen_size=FLAGS.feature_screen_size, feature_minimap_size=FLAGS.feature_minimap_size, rgb_screen_size=FLAGS.rgb_screen_size, rgb_minimap_size=FLAGS.rgb_minimap_size, action_space=(FLAGS.action_space and sc2_env.ActionSpace[FLAGS.action_space]), use_feature_units=FLAGS.use_feature_units, visualize=visualize) as env: env = available_actions_printer.AvailableActionsPrinter(env) agents = [agent_cls() for agent_cls in agent_classes] run_loop.run_loop(agents, env, FLAGS.max_agent_steps) if FLAGS.save_replay: env.save_replay(agent_classes[0].__name__)
def main(args): agent = Agent() try: with sc2_env.SC2Env(map_name = MAPNAME, players = players,\ agent_interface_format = interface,\ step_mul = APM, game_steps_per_episode = UNLIMIT,\ visualize = VISUALIZE, realtime = REALTIME) as env: agent.setup(env.observation_spec(), env.action_spec()) timestep = env.reset() agent.reset() while True: step_actions = [agent.step(timestep[0])] if timestep[0].last(): break timestep = env.step(step_actions) except KeyboardInterrupt: pass
def main(unused_argv): try: with sc2_env.SC2Env( #Select a map map_name="DefeatZealotsBlink", #Add players players=[sc2_env.Agent(sc2_env.Race.protoss)], #Specify interface agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=84, minimap=64), use_feature_units=True), #specify how much action we want to do. 22.4 step per seconds step_mul=2, game_steps_per_episode=0, visualize=False) as env: run_loop.run_loop([DefeatZealotAgent()], env) except KeyboardInterrupt: pass
def return_env(agent_cls, agent_race, bot_race, map_name, visualize): step_mul=int(8) difficulty="1" game_steps_per_episode=int(0) max_agent_steps=int(0) screen_resolution=int(84) minimap_resolution=int(64) save_replay=False env=sc2_env.SC2Env( map_name=map_name, agent_race=agent_race, bot_race=bot_race, difficulty=difficulty, step_mul=step_mul, game_steps_per_episode=game_steps_per_episode, screen_size_px=(screen_resolution, screen_resolution), minimap_size_px=(minimap_resolution, minimap_resolution), visualize=visualize) return env
def reset(self): self.episodes += 1 self.steps = 0 if self.env is None: args = {**self.default_settings} self.env = sc2_env.SC2Env(**args) self.marines = [] self.banelings = [] self.zerglings = [] self.selected = [] self.goal = [0,0] raw_obs = self.env.reset()[0] return self.get_derived_obs(raw_obs)
def test_heterogeneous_observations(self): with sc2_env.SC2Env(map_name="Simple64", players=[ sc2_env.Agent(sc2_env.Race.random), sc2_env.Agent(sc2_env.Race.random) ], agent_interface_format=[ sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(84, 87), minimap=(64, 67))), sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=128, minimap=64)) ]) as env: obs_specs = env.observation_spec() self.assertIsInstance(obs_specs, tuple) self.assertLen(obs_specs, 2) actions_specs = env.action_spec() self.assertIsInstance(actions_specs, tuple) self.assertLen(actions_specs, 2) agents = [] for obs_spec, action_spec in zip(obs_specs, actions_specs): agent = random_agent.RandomAgent() agent.setup(obs_spec, action_spec) agent.reset() agents.append(agent) time_steps = env.reset() for _ in range(100): self.assertIsInstance(time_steps, tuple) self.assertLen(time_steps, 2) actions = [] for i, agent in enumerate(agents): time_step = time_steps[i] obs = time_step.observation self.check_observation_matches_spec(obs, obs_specs[i]) actions.append(agent.step(time_step)) time_steps = env.step(actions)
def __init__(self, render): env = sc2_env.SC2Env( map_name=FLAGS.map, agent_race=FLAGS.agent_race, bot_race=FLAGS.bot_race, difficulty=FLAGS.difficulty, step_mul=FLAGS.step_mul, game_steps_per_episode=FLAGS.game_steps_per_episode, screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution), minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution), visualize=render) self.do_render = render self.resets = 0 self._env = env self._env_timestep = self._env.reset() self.history = [self._env_timestep] * FLAGS.history_size self._actions = [0] self._input_layers = [0]