Esempio n. 1
0
 def Step(self, action_request, context):
     observation, reward, done, _ = self.env.step(action_request.value)
     observation_pb = gym_uds_pb2.Observation(data=observation.ravel(),
                                              shape=observation.shape)
     return gym_uds_pb2.State(observation=observation_pb,
                              reward=reward,
                              done=done)
Esempio n. 2
0
 def Reset(self, empty_request, context):
     observation = self.env.reset()
     observation_pb = gym_uds_pb2.Observation(data=observation.ravel(),
                                              shape=observation.shape)
     return gym_uds_pb2.State(observation=observation_pb,
                              reward=0.0,
                              done=False)
Esempio n. 3
0
    def Step(self, action_request, context):
        act = self.PrepareAction(action_request.data)
        obs, reward, done, _ = self.env.step(act)
        observation = self.PrepareObservation(obs)
        
        assert type(observation) is np.ndarray

        observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape)
        return gym_uds_pb2.State(observation=observation_pb, reward=reward, done=done, envID = self.env_id)
Esempio n. 4
0
 def reset(self):
     observation = self.env.reset()
     observation_pb = gym_uds_pb2.Observation(data=observation.ravel(),
                                              shape=observation.shape)
     utils.send_message(
         self.sock,
         gym_uds_pb2.State(observation=observation_pb,
                           reward=0.0,
                           done=False))
Esempio n. 5
0
 def Reset(self, empty_request, context):
     observation = self.env.reset()
     if (isinstance(observation, (np.ndarray, np.generic))):
         observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape)
     elif (isinstance(observation, tuple)):
         obs = np.array(observation)
         observation_pb = gym_uds_pb2.Observation(data=obs.ravel(), shape=obs.shape)
     else:
         obs = np.array(float(observation))
         observation_pb = gym_uds_pb2.Observation(data=obs.ravel(), shape=obs.shape)
     
     return gym_uds_pb2.State(observation=observation_pb, reward=0.0, done=False, envID = self.env_id)
Esempio n. 6
0
    def step(self):
        action = utils.recv_message(self.sock, gym_uds_pb2.Action)
        observation, reward, done, _ = self.env.step(action.value)
        assert type(observation) is np.ndarray

        observation_pb = gym_uds_pb2.Observation(data=observation.ravel(),
                                                 shape=observation.shape)
        utils.send_message(
            self.sock,
            gym_uds_pb2.State(observation=observation_pb,
                              reward=reward,
                              done=done))