예제 #1
0
    def __init__(self, num_fields, max_ticks=2400):
        self.num_fields = num_fields
        self.num_envs = num_fields * 2
        self.max_ticks = max_ticks

        self.connections = []  # type: List[Pipe]
        self.processes = []  # type: List[Process]
        for i in range(self.num_fields):
            parent_conn, child_conn = Pipe()
            p = Process(target=env_worker,
                        daemon=True,
                        args=(child_conn, ),
                        kwargs=dict(max_ticks=self.max_ticks))
            p.start()
            self.connections.append(parent_conn)
            self.processes.append(p)

        # self.connections[0].send(('get_spaces_spec', None))
        tmp_env = Haxball()
        observation_space = tmp_env.observation_space
        action_space = tmp_env.action_space
        # spec = tmp_env.spec
        # observation_space, action_space, spec = self.connections[0].recv()

        self.observation_space = observation_space
        self.action_space = action_space
        self.keys, shapes, dtypes = obs_space_info(self.observation_space)
        self.waiting = False
예제 #2
0
 def get_env(field_id):
     if field_id in envs:
         return envs[field_id]
     else:
         gameplay = Ccreate_start_conditions()
         env = Haxball(gameplay=gameplay, **env_kwargs)
         envs[field_id] = env
         return env
예제 #3
0
    def __init__(self, num_fields, max_ticks=2400):
        self.num_fields = num_fields
        self.num_envs = num_fields * 2
        self.envs = []
        for i in range(num_fields):
            gameplay = Ccreate_start_conditions()
            env = Haxball(gameplay=gameplay, max_ticks=max_ticks)
            self.envs.append(env)

        env = self.envs[0]

        self.observation_space = env.observation_space
        self.action_space = env.action_space
        self.keys, shapes, dtypes = obs_space_info(self.observation_space)

        self.buf_obs = {
            k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k])
            for k in self.keys
        }
        self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
        self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.spec = self.envs[0].spec
예제 #4
0
def env_worker(conn: Connection, **env_kwargs):
    gameplay = Ccreate_start_conditions()
    env = Haxball(gameplay=gameplay, **env_kwargs)
    i = 0
    while True:
        cmd, data = conn.recv()

        if cmd == 'step':
            a1, a2 = data
            env.step_async(a1, red_team=True)
            env.step_async(a2, red_team=False)

            env.step_physics()

            obss = []
            rews = []
            dones = []
            infos = []
            is_done = False

            for red_team in (True, False):
                obs, rew, done, info = env.step_wait(red_team=red_team)
                obss.append(obs)
                rews.append(rew)
                dones.append(done)
                infos.append(info)
                is_done |= done
            if is_done:
                env.reset()

            res = np.array(obss), np.array(rews), np.array(dones), np.array(
                infos)
            conn.send(res)
        elif cmd == 'reset':
            ob = env.reset()
            conn.send([ob, ob])
        elif cmd == 'render':
            res = env.render(mode='rgb_array')
            conn.send(res)
        elif cmd == 'close':
            conn.close()
            break
        elif cmd == 'get_spaces_spec':
            conn.send((env.observation_space, env.action_space, env.spec))
        else:
            raise NotImplementedError
    gameplay = Ccreate_start_conditions(
        posizione_palla=CVector(0, 0),
        velocita_palla=CVector(0, 0),
        posizione_blu=CVector(277.5, 0),
        velocita_blu=CVector(0, 0),
        input_blu=0,
        posizione_rosso=CVector(-277.5, 0),
        velocita_rosso=CVector(0, 0),
        input_rosso=0,
        tempo_iniziale=0,
        punteggio_rosso=0,
        punteggio_blu=0
    )

    env = Haxball(gameplay=gameplay, max_ticks=max_ticks*2)
    obs = env.reset()
    action = 0
    play_red = 1

    dm = DelayedModel(env, model, play_red, nenvs=nenvs, nlstm=nlstm, nsteps=nsteps)

    blue_unpressed = True
    red_unpressed = True

    D_i = 1 if play_red else 2
    i = 0
    reward = None
    ret = None
    next_action = 0
    while True: