Ejemplo n.º 1
0
    def step(self, action):
        """
        This function performs one simulation step in a RFL algorithm. 
        It updates the state and returns a reward according to the chosen reward-function.
        This is not affected by the noise. 
        """
        next_t = self.t + self.dt
        self._adjust_parameters(action)

        self.state = self._perform_step(next_t)
        self.t = next_t
        if self._arrived_at_final_state():
            self.final_state = True

        reward = self.reward_function(action)
        if not self._inside_planetary_boundaries():
            self.final_state = True
        trafo_state = compactification(self.state, self.current_state)

        part_state = trafo_state[self.obs_array]
        return_state = self._add_noise(part_state)

        #         print("Step Noisy Environment", return_state)

        return return_state, reward, self.final_state
Ejemplo n.º 2
0
    def step(self, action):
        """
        This function performs one simulation step in a RFL algorithm. 
        It updates the state and returns a reward according to the chosen reward-function.
        """

        next_t= self.t + self.dt
        self._adjust_parameters(action)
        
        self.state=self._perform_step( next_t)
        self.t=next_t
        if self._arrived_at_final_state():
            self.final_state = True
        
        reward=self.reward_function(action)
        if not self._inside_planetary_boundaries():
            self.final_state = True
        
        measured_states=self._get_measurement_PB(self.state)
        
        trafo_state=compactification(measured_states, self.ini_measured_state )
        return_state=self._add_noise(trafo_state)
#         print("Step PB", return_state)

        return return_state, reward, self.final_state       
Ejemplo n.º 3
0
    def reset(self):
        self.start_state = self.state = np.array(
            self.current_state_region_StartPoint())
        trafo_state = compactification(self.state, self.current_state)

        self.final_state = False
        self.t = self.t0
        return_state = trafo_state[self.obs_array]

        return return_state
Ejemplo n.º 4
0
    def reset(self):
        self.start_state=self.state=np.array(self.current_state_region_StartPoint())
        measured_states=self._get_measurement_PB(self.state)

        trafo_state=compactification(measured_states, self.ini_measured_state)

        self.final_state=False
        self.t=self.t0
        return_state=self._add_noise(trafo_state)
        
        return return_state    
Ejemplo n.º 5
0
    def reset_for_state(self, state=None):
        if state == None:
            self.start_state = self.state = self.current_state
        else:
            self.start_state = self.state = np.array(state)
        self.final_state = False
        self.t = self.t0
        trafo_state = compactification(self.state, self.current_state)
        return_state = trafo_state[self.obs_array]
        #         print("Reset to state: " , return_state)

        return return_state
Ejemplo n.º 6
0
    def reset_for_state(self, state=None):
        if state==None:
            self.start_state=self.state=self.current_state
        else:
            self.start_state=self.state=np.array(state)
        self.final_state=False
        self.t=self.t0
        
        measured_states=self._get_measurement_PB(self.state)
        trafo_state=compactification(measured_states, self.ini_measured_state)
        return_state=self._add_noise(trafo_state)

#         print("Reset to state: " , return_state)

        return return_state