def __init__(self, num_fields, max_ticks=2400): self.num_fields = num_fields self.num_envs = num_fields * 2 self.max_ticks = max_ticks self.connections = [] # type: List[Pipe] self.processes = [] # type: List[Process] for i in range(self.num_fields): parent_conn, child_conn = Pipe() p = Process(target=env_worker, daemon=True, args=(child_conn, ), kwargs=dict(max_ticks=self.max_ticks)) p.start() self.connections.append(parent_conn) self.processes.append(p) # self.connections[0].send(('get_spaces_spec', None)) tmp_env = Haxball() observation_space = tmp_env.observation_space action_space = tmp_env.action_space # spec = tmp_env.spec # observation_space, action_space, spec = self.connections[0].recv() self.observation_space = observation_space self.action_space = action_space self.keys, shapes, dtypes = obs_space_info(self.observation_space) self.waiting = False
def get_env(field_id): if field_id in envs: return envs[field_id] else: gameplay = Ccreate_start_conditions() env = Haxball(gameplay=gameplay, **env_kwargs) envs[field_id] = env return env
def __init__(self, num_fields, max_ticks=2400): self.num_fields = num_fields self.num_envs = num_fields * 2 self.envs = [] for i in range(num_fields): gameplay = Ccreate_start_conditions() env = Haxball(gameplay=gameplay, max_ticks=max_ticks) self.envs.append(env) env = self.envs[0] self.observation_space = env.observation_space self.action_space = env.action_space self.keys, shapes, dtypes = obs_space_info(self.observation_space) self.buf_obs = { k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None self.spec = self.envs[0].spec
def env_worker(conn: Connection, **env_kwargs): gameplay = Ccreate_start_conditions() env = Haxball(gameplay=gameplay, **env_kwargs) i = 0 while True: cmd, data = conn.recv() if cmd == 'step': a1, a2 = data env.step_async(a1, red_team=True) env.step_async(a2, red_team=False) env.step_physics() obss = [] rews = [] dones = [] infos = [] is_done = False for red_team in (True, False): obs, rew, done, info = env.step_wait(red_team=red_team) obss.append(obs) rews.append(rew) dones.append(done) infos.append(info) is_done |= done if is_done: env.reset() res = np.array(obss), np.array(rews), np.array(dones), np.array( infos) conn.send(res) elif cmd == 'reset': ob = env.reset() conn.send([ob, ob]) elif cmd == 'render': res = env.render(mode='rgb_array') conn.send(res) elif cmd == 'close': conn.close() break elif cmd == 'get_spaces_spec': conn.send((env.observation_space, env.action_space, env.spec)) else: raise NotImplementedError
gameplay = Ccreate_start_conditions( posizione_palla=CVector(0, 0), velocita_palla=CVector(0, 0), posizione_blu=CVector(277.5, 0), velocita_blu=CVector(0, 0), input_blu=0, posizione_rosso=CVector(-277.5, 0), velocita_rosso=CVector(0, 0), input_rosso=0, tempo_iniziale=0, punteggio_rosso=0, punteggio_blu=0 ) env = Haxball(gameplay=gameplay, max_ticks=max_ticks*2) obs = env.reset() action = 0 play_red = 1 dm = DelayedModel(env, model, play_red, nenvs=nenvs, nlstm=nlstm, nsteps=nsteps) blue_unpressed = True red_unpressed = True D_i = 1 if play_red else 2 i = 0 reward = None ret = None next_action = 0 while True: