def step(self, action): """ This function performs one simulation step in a RFL algorithm. It updates the state and returns a reward according to the chosen reward-function. This is not affected by the noise. """ next_t = self.t + self.dt self._adjust_parameters(action) self.state = self._perform_step(next_t) self.t = next_t if self._arrived_at_final_state(): self.final_state = True reward = self.reward_function(action) if not self._inside_planetary_boundaries(): self.final_state = True trafo_state = compactification(self.state, self.current_state) part_state = trafo_state[self.obs_array] return_state = self._add_noise(part_state) # print("Step Noisy Environment", return_state) return return_state, reward, self.final_state
def step(self, action): """ This function performs one simulation step in a RFL algorithm. It updates the state and returns a reward according to the chosen reward-function. """ next_t= self.t + self.dt self._adjust_parameters(action) self.state=self._perform_step( next_t) self.t=next_t if self._arrived_at_final_state(): self.final_state = True reward=self.reward_function(action) if not self._inside_planetary_boundaries(): self.final_state = True measured_states=self._get_measurement_PB(self.state) trafo_state=compactification(measured_states, self.ini_measured_state ) return_state=self._add_noise(trafo_state) # print("Step PB", return_state) return return_state, reward, self.final_state
def reset(self): self.start_state = self.state = np.array( self.current_state_region_StartPoint()) trafo_state = compactification(self.state, self.current_state) self.final_state = False self.t = self.t0 return_state = trafo_state[self.obs_array] return return_state
def reset(self): self.start_state=self.state=np.array(self.current_state_region_StartPoint()) measured_states=self._get_measurement_PB(self.state) trafo_state=compactification(measured_states, self.ini_measured_state) self.final_state=False self.t=self.t0 return_state=self._add_noise(trafo_state) return return_state
def reset_for_state(self, state=None): if state == None: self.start_state = self.state = self.current_state else: self.start_state = self.state = np.array(state) self.final_state = False self.t = self.t0 trafo_state = compactification(self.state, self.current_state) return_state = trafo_state[self.obs_array] # print("Reset to state: " , return_state) return return_state
def reset_for_state(self, state=None): if state==None: self.start_state=self.state=self.current_state else: self.start_state=self.state=np.array(state) self.final_state=False self.t=self.t0 measured_states=self._get_measurement_PB(self.state) trafo_state=compactification(measured_states, self.ini_measured_state) return_state=self._add_noise(trafo_state) # print("Reset to state: " , return_state) return return_state