コード例 #1
0
class WandererRoborobo(MultiAgentEnv):
    action_space = Box(np.array([-1, -1], dtype=np.float32), np.array([1, 1], dtype=np.float32))
    observation_space = Box(np.concatenate((np.repeat(0, 8), [-2, -15])), np.concatenate((np.repeat(1, 8), [2, 15])))

    def __init__(self, nbrobs: int, max_moves: int):
        super().__init__()
        self.nbrobs = nbrobs
        self.max_moves = max_moves
        self.moves = 0
        self.rob = None
        self.wms = None

    def __del__(self):
        if self.rob:
            self.rob.close()

    def reset(self):
        if self.rob is None:
            self.rob = Pyroborobo('config/wanderer.properties', WorldObserver, "dummy", "dummy", "dummy",
                                  {'gInitialNumberOfRobots': str(self.nbrobs)})
            self.rob.start()
            wo = self.rob.world_observer
            wo.set_roborobo(self.rob)
            self.wms = self.rob.world_models
        else:
            print('reset')
            wo = self.rob.world_observer
            wo.reset()
        self.moves = 0
        obs_dict = {'player'+str(i): self.get_obs(i) for i in range(self.nbrobs)}
        return obs_dict

    def get_obs(self, i):
        return np.concatenate((self.wms[i].get_camera_sensors_dist(), [self.wms[i].translation, self.wms[i].rotation]))

    def step(self, action_dict):
        self.moves += 1

        for i in range(self.nbrobs):
            self.wms[i].translation = action_dict['player'+str(i)][0] * 2
            self.wms[i].rotation = action_dict['player'+str(i)][1] * 15
        stop = self.rob.update(1)
        if stop:
            sys.exit(0)
        obs_dict = {'player'+str(i): self.get_obs(i) for i in range(self.nbrobs)}
        punish = {i: (1 - obs_dict['player'+str(i)][2]) + (1 - obs_dict['player'+str(i)][3] + (1 - obs_dict['player'+str(i)][4])
                                                           + (np.abs(obs_dict['player'+str(i)][9])/15))
                  for i in range(self.nbrobs)}
        done = {'player'+str(i): self.moves > self.max_moves for i in range(self.nbrobs)}
        done['__all__'] = all(done.values())
        rewards = {'player'+str(i): self.wms[i].translation - 10*punish[i] for i in range(self.nbrobs)}
        return obs_dict, rewards, done, {}

    def render(self):
        pass
コード例 #2
0
class NegotiateRoborobo(MultiAgentEnv):
    action_space = Box(np.array([-1] * 4, dtype=np.float32),
                       np.array([1] * 4, dtype=np.float32))
    observation_space = Box(np.repeat(0, 8 * 4 + 2), np.repeat(1, 8 * 4 + 2))

    def __init__(self, nbrobs: int):
        super().__init__()
        self.nbrobs = nbrobs
        self.rob = None
        self.wms = None
        self.curlife = np.zeros(nbrobs, dtype=int)

    def __del__(self):
        if self.rob:
            self.rob.close()

    def reset(self):
        if self.rob is None:
            self.rob = Pyroborobo('config/pynegociate.properties', None, None,
                                  None, None,
                                  {'gInitialNumberOfRobots': str(self.nbrobs)})
            self.rob.start()
            wo = self.rob.world_observer
            self.wms = self.rob.world_models
        else:
            print('reset')
            wo = self.rob.world_observer
            wo.reset()
        obs_dict = {
            f'{i}-{self.curlife[i]}': self.wms[i].get_observations()['obs']
            for i in range(self.nbrobs)
        }
        return obs_dict

    def step(self, action_dict):
        for key in action_dict:
            idplayer = _get_id_player(key)
            self.wms[idplayer].set_actions(action_dict[key])
        stop = self.rob.update(1)
        if stop:
            sys.exit(0)
        obs_dict = {}
        rewards_dict = {}
        done_dict = {}
        for i in range(self.nbrobs):
            obs = self.wms[i].get_observations()
            if obs['seeking']:
                done = self.wms[i].get_done()
                obs_dict[f'{i}-{self.curlife[i]}'] = obs['obs']
                rewards_dict[f'{i}-{self.curlife[i]}'] = self.wms[
                    i].get_reward()
                done_dict[f'{i}-{self.curlife[i]}'] = done
                if done:
                    self.curlife[i] += 1
        if done_dict:
            done_dict['__all__'] = all(done_dict.values())
        else:
            done_dict['__all__'] = False
        return obs_dict, rewards_dict, done_dict, {}

    def render(self):
        pass