def Step(self, action_request, context): observation, reward, done, _ = self.env.step(action_request.value) observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape) return gym_uds_pb2.State(observation=observation_pb, reward=reward, done=done)
def Reset(self, empty_request, context): observation = self.env.reset() observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape) return gym_uds_pb2.State(observation=observation_pb, reward=0.0, done=False)
def Step(self, action_request, context): act = self.PrepareAction(action_request.data) obs, reward, done, _ = self.env.step(act) observation = self.PrepareObservation(obs) assert type(observation) is np.ndarray observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape) return gym_uds_pb2.State(observation=observation_pb, reward=reward, done=done, envID = self.env_id)
def reset(self): observation = self.env.reset() observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape) utils.send_message( self.sock, gym_uds_pb2.State(observation=observation_pb, reward=0.0, done=False))
def Reset(self, empty_request, context): observation = self.env.reset() if (isinstance(observation, (np.ndarray, np.generic))): observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape) elif (isinstance(observation, tuple)): obs = np.array(observation) observation_pb = gym_uds_pb2.Observation(data=obs.ravel(), shape=obs.shape) else: obs = np.array(float(observation)) observation_pb = gym_uds_pb2.Observation(data=obs.ravel(), shape=obs.shape) return gym_uds_pb2.State(observation=observation_pb, reward=0.0, done=False, envID = self.env_id)
def step(self): action = utils.recv_message(self.sock, gym_uds_pb2.Action) observation, reward, done, _ = self.env.step(action.value) assert type(observation) is np.ndarray observation_pb = gym_uds_pb2.Observation(data=observation.ravel(), shape=observation.shape) utils.send_message( self.sock, gym_uds_pb2.State(observation=observation_pb, reward=reward, done=done))