예제 #1
0
 def simulate_n_steps(self,
                      xs, us, zs,                         
                      control_durations,
                      x_true,                         
                      x_tilde,
                      x_tilde_linear,
                      x_estimate,
                      P_t,
                      total_reward,                         
                      current_step,
                      n_steps,
                      deviation_covariances,
                      estimated_deviation_covariances,
                      max_num_steps=None):
     history_entries = []        
     terminal_state_reached = False
     success = False
     collided = False
     x_dash = np.copy(x_tilde)
     x_dash_linear = np.copy(x_tilde_linear)
     x_true_linear = x_true        
     As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(xs, us, control_durations)
     Ls = kalman.compute_gain(As, Bs, self.C, self.D, len(xs) - 1)
     logging.info("Simulator: Executing for " + str(n_steps) + " steps") 
     estimated_states = []
     estimated_covariances = []
     self.show_nominal_path(xs)        
     """ Execute the nominal path for n_steps """        
     for i in xrange(n_steps):                        
         if not terminal_state_reached:
             linearization_error = utils.dist(x_dash, x_dash_linear)                
             history_entries.append(HistoryEntry(current_step,
                                                 x_true, 
                                                 xs[i],
                                                 x_true_linear,
                                                 x_estimate,
                                                 x_dash,
                                                 x_dash_linear,
                                                 linearization_error, 
                                                 None,
                                                 None,
                                                 None,
                                                 P_t,
                                                 False,
                                                 False,
                                                 False,
                                                 0.0))
             current_step += 1
             history_entries[-1].set_s_dash_estimated(x_tilde)
             
             """ Calc u_dash from the estimated deviation of the state from the nominal path 'x_tilde'
             using the optimal gain L
             """                      
             u_dash = np.dot(Ls[i], x_tilde)                
             history_entries[-1].set_u_dash(u_dash)
             
             """ Calc the actual control input """ 
             u = u_dash + us[i] 
             
             """ Enforce the control constraints on 'u' and 'u_dash' """
             if self.enforce_constrol_constraints:                    
                 u, u_dash = self.enforce_control_constraints(u, us[i])                          
             history_entries[-1].set_action(u)
             history_entries[-1].set_nominal_action(us[i])
             
             """ Apply the control 'u' and propagate the state 'x_true' """
             x_true_temp, ce = self.apply_control(x_true, 
                                                  u,
                                                  control_durations[i], 
                                                  As[i], 
                                                  Bs[i], 
                                                  Vs[i], 
                                                  Ms[i])
             
             """ Calc the linearized next state (used to compare with x_true) """             
             x_dash_linear_temp = self.get_linearized_next_state(x_dash_linear, u_dash, ce, As[i], Bs[i], Vs[i])
             x_true_linear_temp = np.add(x_dash_linear_temp, xs[i+1])
             if self.enforce_constraints:
                 x_true_linear_temp = self.check_constraints(x_true_linear_temp)
             
             discount = np.power(self.discount_factor, current_step - 1)
             
             """ Check if the propagated state collides with an obstacle
             If yes, the true state is set to the previous state with 0 velocity.
             If not, set the true state to the propagated state
             """
             collided = False
             in_collision_true_state, colliding_obstacle = self.is_in_collision(x_true, x_true_temp)                                                    
             if in_collision_true_state:
                 for j in xrange(len(x_true) / 2, len(x_true)):
                     x_true[j] = 0.0                  
                 logging.info("Simulator: Collision detected. Setting state estimate to the previous state")
                 total_reward += discount * (-1.0 * self.illegal_move_penalty)
                 history_entries[-1].set_reward(discount * (-1.0 * self.illegal_move_penalty))
                 history_entries[-1].set_colliding_obstacle(colliding_obstacle.getName())
                 history_entries[-1].set_colliding_state(x_true_temp)
                 collided = True
             else:
                 total_reward += discount * (-1.0 * self.step_penalty)
                 history_entries[-1].set_reward(discount * (-1.0 * self.illegal_move_penalty))
                 x_true = x_true_temp                
             
             """ Do the same for the linearized true state """ 
             if self.is_in_collision(x_true_linear, x_true_linear_temp)[0]:
                 for j in xrange(len(x_true_linear) / 2, len(x_true_linear)):
                     x_true_linear[j] = 0.0                  
             else:
                 x_true_linear = x_true_linear_temp
             
             """ Calculate the state deviation from the nominal path """                           
             x_dash = np.subtract(x_true, xs[i + 1])
             x_dash_linear = np.subtract(x_true_linear , xs[i + 1])
             
             """ Get the end effector position for the true state """
             state = v_double()
             state[:] = [x_true[j] for j in xrange(len(x_true) / 2)]
             ee_position_arr = v_double()
             self.robot.getEndEffectorPosition(state, ee_position_arr)
             ee_position = np.array([ee_position_arr[j] for j in xrange(len(ee_position_arr))])
             logging.info("Simulator: Current end-effector position is " + str(ee_position))                                                
             
             """ Generate an observation for the true state """
             z = self.get_observation(x_true, Hs[i + 1], Ns[i + 1], Ws[i + 1])
             z_dash = np.subtract(z, zs[i+1])
             history_entries[-1].set_observation(z)
             
             """ Update the viewer to display the true state """                       
             self.update_viewer(x_true, z, control_durations[i], colliding_obstacle)
             
             """ Kalman prediction and update """                   
             x_tilde_dash, P_dash = kalman.kalman_predict(x_tilde, u_dash, As[i], Bs[i], P_t, Vs[i], Ms[i])
             x_tilde, P_t = kalman.kalman_update(x_tilde_dash, 
                                                 z_dash, 
                                                 Hs[i], 
                                                 P_dash, 
                                                 Ws[i], 
                                                 Ns[i])
             
             
             """ x_estimate_new is the estimated state """                            
             x_estimate_new = x_tilde + xs[i + 1]
             
             """ Enforce the constraints to the estimated state """
             if self.enforce_constraints:     
                 x_estimate_new = self.check_constraints(x_estimate_new) 
             
             """ Check if the estimated state collides.
             If yes, set it to the previous estimate (with velicity 0).
             If no, set the true estimate to this estimate 
             """
             estimate_collided = True              
             if not self.is_in_collision([], x_estimate_new)[0]:                                                                                                    
                 x_estimate = x_estimate_new
                 estimate_collided = False
             else:                    
                 for i in xrange(len(x_estimate) / 2, len(x_estimate)):
                     x_estimate[i] = 0                                          
             estimated_states.append(x_estimate)
             estimated_covariances.append(P_t)                
             
             """ Check if the end-effector position is terminal. If yes, we're done """
             if self.is_terminal(ee_position):
                 discount = np.power(self.discount_factor, current_step)
                 history_entries.append(HistoryEntry(current_step,
                                                     x_true,
                                                     xs[i + 1],
                                                     x_true_linear, 
                                                     x_estimate, 
                                                     x_dash,
                                                     x_dash_linear,
                                                     0.0,
                                                     None,
                                                     None,
                                                     z,
                                                     P_t,
                                                     False,
                                                     False,
                                                     True,
                                                     discount * self.exit_reward))                                      
                 terminal_state_reached = True                                           
                 total_reward += discount * self.exit_reward                    
                 history_entries[-1].set_linearization_error(utils.dist(x_dash, x_dash_linear))                                         
                 logging.info("Terminal state reached: reward = " + str(total_reward))                    
             history_entries[-1].set_collided(collided)
             history_entries[-1].set_estimate_collided(estimate_collided)
             
             
             if current_step == max_num_steps: 
                 """ This was the last step -> append final history entry"""
                 history_entries.append(HistoryEntry(current_step,
                                                     x_true,
                                                     xs[i + 1],
                                                     x_true_linear,
                                                     x_estimate,
                                                     x_dash,
                                                     x_dash_linear,
                                                     0.0,
                                                     None,
                                                     None,
                                                     z,
                                                     P_t,
                                                     False,
                                                     False,
                                                     False,
                                                     0.0))
                 history_entries[-1].set_linearization_error(utils.dist(x_dash, x_dash_linear))                        
                 #total_reward += discount * self.exit_reward
             if (collided and self.stop_when_colliding):                                                                               
                 break                
     #print "========================================"
     #print "======= Simulation done"
     #print "========================================"              
     return (x_true, 
             x_tilde,
             x_dash_linear, 
             x_estimate, 
             P_t, 
             current_step, 
             total_reward,                
             terminal_state_reached,
             estimated_states,
             estimated_covariances,                
             history_entries)    
예제 #2
0
 def evaluate(self, 
              index, 
              path, 
              P_t, 
              current_step, 
              horizon, 
              robot, 
              eval_queue=None,
              deviation_covariance=None,
              estimated_deviation_covariance=None):
     if self.show_viewer:
         robot.setupViewer(self.model_file, self.env_file)
     xs = path[0]
     us = path[1]
     zs = path[2]        
     #self.show_nominal_path(robot, xs) 
     control_durations = path[3]      
     horizon_L = horizon + 1 
     if horizon == -1 or len(xs) < horizon_L:            
         horizon_L = len(xs)
     
     As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(xs, us, control_durations)
     Ls = kalman.compute_gain(As, Bs, self.C, self.D, horizon_L - 1)        
     Q_t = np.vstack((np.hstack((self.M, np.zeros((self.M.shape[0], self.N.shape[1])))), 
                      np.hstack((np.zeros((self.N.shape[0], self.M.shape[1])), self.N))))
     R_t = np.vstack((np.hstack((np.copy(P_t), np.zeros((P_t.shape[0], P_t.shape[1])))),
                      np.hstack((np.zeros((P_t.shape[0], P_t.shape[1])), np.zeros((P_t.shape[0], P_t.shape[1]))))))
     ee_distributions = []
     ee_approx_distr = []
     collision_probs = []
     path_rewards = []
     expected_num_collisions_path = 0  
     (state_reward, terminal, expected_num_collisions_step, samples) = self.get_expected_state_reward(xs[0], P_t)
     expected_num_collisions_path += expected_num_collisions_step
     path_rewards.append(np.power(self.discount, current_step) * state_reward)
     Cov = 0 
     CPi = []
     estimated_state_covariances = [P_t]
     deviation_covariances = []
     estimated_deviation_covariances = []
     
     if deviation_covariance == None:
         deviation_covariances.append(np.zeros((2 * self.robot_dof, 2 * self.robot_dof)))
     else:
         deviation_covariances.append(deviation_covariance)
         
     if estimated_deviation_covariance == None:            
         estimated_deviation_covariances.append(np.zeros((2 * self.robot_dof, 2 * self.robot_dof)))
     else:
         estimated_deviation_covariances.append(estimated_deviation_covariance)       
     
     for i in xrange(1, horizon_L):                              
         P_hat_t = kalman.compute_p_hat_t(As[i], P_t, Vs[i], Ms[i])            
         K_t = kalman.compute_kalman_gain(Hs[i], P_hat_t, Ws[i], Ns[i])
         P_t = kalman.compute_P_t(K_t, Hs[i], P_hat_t)            
         F_0 = np.hstack((As[i], np.dot(Bs[i], Ls[i - 1])))
         F_1 = np.hstack((np.dot(K_t, np.dot(Hs[i], As[i])), 
                          As[i] + np.dot(Bs[i], Ls[i - 1]) - np.dot(K_t, np.dot(Hs[i], As[i]))))            
         F_t = np.vstack((F_0, F_1))
         G_t_l_r = np.dot(K_t, Ws[i])
         G_t_u_r = np.zeros((Vs[i].shape[0], G_t_l_r.shape[1]))                                         
         G_t = np.vstack((np.hstack((Vs[i], G_t_u_r)), 
                          np.hstack((np.dot(K_t, np.dot(Hs[i], Vs[i])), G_t_l_r)))) 
          
         """ R_t is the covariance matrix of [x_dash_t, x_tilde_t]^T
         """   
         R_t = np.dot(F_t, np.dot(R_t, F_t.T)) + np.dot(G_t, np.dot(Q_t, G_t.T)) 
         deviation_covariance = np.array([[R_t[j, k] for k in xrange(R_t.shape[1] / 2)] for j in xrange(R_t.shape[0] / 2)])            
         deviation_covariances.append(deviation_covariance) 
         estimated_deviation_covariance = np.array([[R_t[j, k] for k in xrange(R_t.shape[1] / 2, R_t.shape[1])] for j in xrange(R_t.shape[0] / 2, R_t.shape[0])])
         estimated_deviation_covariances.append(estimated_deviation_covariance)
         L = np.identity(2 * self.robot_dof)
         if i != horizon_L - 1:
             L = Ls[i]
         Gamma_t = np.vstack((np.hstack((np.identity(L.shape[1]), np.zeros((L.shape[1], L.shape[1])))), 
                              np.hstack((np.zeros((L.shape[0], L.shape[1])), L))))                 
         Cov = np.dot(Gamma_t, np.dot(R_t, Gamma_t.T))                       
         cov_state = np.array([[Cov[j, k] for k in xrange(2 * self.robot_dof)] for j in xrange(2 * self.robot_dof)])
         estimated_state_covariances.append(cov_state)            
         (state_reward, terminal, expected_num_collisions_step, samples) = self.get_expected_state_reward(xs[i], cov_state) 
         CPi.append(expected_num_collisions_step)
         expected_num_collisions_path += expected_num_collisions_step           
         path_rewards.append(np.power(self.discount, current_step + i) * state_reward)            
         if self.show_viewer:
             self.show_state_and_cov(xs[i], cov_state, samples)                
             time.sleep(0.2)
     path_reward = sum(path_rewards)
     product = 1.0
     for i in xrange(len(CPi)):
         product *= (1.0 - CPi[i])
         
     '''
     CP is the probability that the nominal path collides, approximated using a multiplicative approach
     '''    
     CP = 1 - product 
     #CP = sum(CPi) 
     #print "PathEvaluator: Path " + str(index) + " evaluated. Reward: " + str(path_reward) + ", mean num collisions: " + str(expected_num_collisions_path) + " " \
     #      "CP " + str(CP)
     
     logging.info("========================================")
     logging.info("PathEvaluator: reward for path " + 
                  str(index) + 
                  " is " + 
                  str(path_reward))
     logging.info("========================================")        
     if not eval_queue==None:                         
         eval_queue.put((index, 
                         path_reward, 
                         path, 
                         Cov, 
                         estimated_state_covariances, 
                         deviation_covariances, 
                         estimated_deviation_covariances))
     else:                     
         return (path_reward, 
                 estimated_state_covariances, 
                 deviation_covariances, 
                 estimated_deviation_covariances)
예제 #3
0
 def adjust_plan(self, 
                 robot,                    
                 plan,                                      
                 x_estimated,
                 P_t):
     xs = [plan[0][i] for i in xrange(1, len(plan[0]))]
     us = [plan[1][i] for i in xrange(1, len(plan[1]))]
     zs = [plan[2][i] for i in xrange(1, len(plan[2]))]
     control_durations = [plan[3][i] for i in xrange(1, len(plan[3]))]
     As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(robot, 
                                                                 xs, 
                                                                 us,                                                                    
                                                                 control_durations)
     Ls = kalman.compute_gain(As, Bs, self.C, self.D, len(xs) - 1) 
     
     xs_adjusted = []
     us_adjusted = []
     zs_adjusted = []
     
     xs_adjusted.append(xs[0])
     zs_adjusted.append(zs[0])
     try:
         x_tilde = x_estimated - xs[0] 
     except Exception as e:
         print e
         print "==================="
         print "x_estimated " + str(x_estimated)
         print "xs[0] " + str(xs[0])
         print "xs " + str(xs)
         return (None, None, None, None, True)                   
     for i in xrange(len(xs) - 1):
         x_predicted = np.array([xs[i][k] for k in xrange(len(xs[i]))])
         u = np.dot(Ls[i], x_estimated - x_predicted) + us[i]            
         if self.control_constraints_enforced:                
             u = self.enforce_control_constraints(u) 
         if self.dynamic_problem:
             current_state = v_double()
             current_state[:] = [xs_adjusted[i][j] for j in xrange(len(xs_adjusted[i]))]
             control = v_double()
             control[:] = u
             
             control_error = v_double()
             control_error[:] = [0.0 for k in xrange(len(u))]
             result = v_double()
             robot.propagate(current_state,
                             control,
                             control_error,
                             self.simulation_step_size,
                             control_durations[i],
                             result)
             xs_adjusted.append(np.array([result[k] for k in xrange(len(result))]))            
             us_adjusted.append(np.array([u[k] for k in xrange(len(u))]))
             zs_adjusted.append(np.array([result[k] for k in xrange(len(result))]))
             #print "xs_adjusted: " + str(np.array([result[k] for k in xrange(len(result))]))
             #print "xs_prior: " + str(xs[i + 1])
         else:
             current_state = np.array([xs_adjusted[i][j] for j in xrange(len(xs_adjusted[i]))])
             result = np.dot(As[i], current_state) + np.dot(Bs[i], u)
             xs_adjusted.append(np.array([result[k] for k in xrange(len(result))]))            
             us_adjusted.append(np.array([u[k] for k in xrange(len(u))]))
             zs_adjusted.append(np.array([result[k] for k in xrange(len(result))]))
             
         
         u_dash = u - us[i]
         z_dash = np.array([0.0 for k in xrange(len(zs_adjusted[-1]))])
         
         """
         Maximum likelikhood observation
         """
         """ Kalman prediction and update """                   
         x_tilde_dash, P_dash = kalman.kalman_predict(x_tilde, u_dash, As[i], Bs[i], P_t, Vs[i], Ms[i])
         x_tilde, P_t = kalman.kalman_update(x_tilde_dash, 
                                             z_dash, 
                                             Hs[i], 
                                             P_dash, 
                                             Ws[i], 
                                             Ns[i])
         
         x_estimated = x_tilde + xs[i + 1]
         #x_estimated = x_tilde + xs_adjusted[-1]
     us_adjusted.append(np.array([0.0 for i in xrange(len(us[0]))]))
     control_durations_adjusted = [control_durations[i] for i in xrange(len(control_durations))]
     control_durations_adjusted.append(0.0)
     return (xs_adjusted, us_adjusted, zs_adjusted, control_durations_adjusted, True)
예제 #4
0
    def simulate_n_steps(self,
                         xs,
                         us,
                         zs,
                         control_durations,
                         x_true,
                         x_estimate,
                         P_t,
                         total_reward,
                         current_step,
                         n_steps,
                         deviation_covariances,
                         estimated_deviation_covariances,
                         max_num_steps=None):
        history_entries = []
        terminal_state_reached = False
        success = False
        collided = False
        z = None
        x_dash = np.subtract(np.array(x_true), np.array(xs[0]))
        x_dash_linear = np.copy(x_dash)
        x_true_linear = x_true
        x_tilde = np.array(x_estimate) - np.array(xs[0])
        As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(
            xs, us, control_durations)
        Ls = kalman.compute_gain(As, Bs, self.C, self.D, len(xs) - 1)
        logging.info("Simulator: Executing for " + str(n_steps) + " steps")
        estimated_states = []
        estimated_covariances = []
        self.show_nominal_path(xs)
        """ Execute the nominal path for n_steps """
        for i in xrange(n_steps):
            if not terminal_state_reached:
                history_entries.append(
                    HistoryEntry(current_step, x_true, xs[i], x_true_linear,
                                 x_estimate, x_dash, x_dash_linear, 0.0, None,
                                 None, None, P_t, False, False, False, 0.0))
                x_t_n = np.linalg.norm(
                    np.array([x_true[k] for k in xrange(len(x_true) / 2)]))
                x_e_n = np.linalg.norm(
                    np.array(
                        [x_estimate[k] for k in xrange(len(x_estimate) / 2)]))
                x_true_norm = np.array([0.0 for k in xrange(len(x_true) / 2)])
                x_estimate_norm = np.array(
                    [0.0 for k in xrange(len(x_estimate) / 2)])
                if x_t_n != 0:
                    x_true_norm = np.array(
                        [x_true[k] for k in xrange(len(x_true) / 2)]) / x_t_n
                if x_e_n != 0:
                    x_estimate_norm = np.array(
                        [x_estimate[k]
                         for k in xrange(len(x_estimate) / 2)]) / x_e_n

                history_entries[-1].set_estimation_error(
                    np.linalg.norm(np.array(x_true) - np.array(x_estimate)))
                history_entries[-1].set_estimation_error_normalized(
                    np.linalg.norm(
                        np.array(x_true_norm) - np.array(x_estimate_norm)))

                current_step += 1
                history_entries[-1].set_s_dash_estimated(x_tilde)
                """ Calc u_dash from the estimated deviation of the state from the nominal path 'x_tilde'
                using the optimal gain L
                """
                try:
                    u_dash = np.dot(Ls[i], x_tilde)
                except Exception as e:
                    print e
                    print "i " + str(i)
                    print "len(LS) " + str(len(Ls))
                    print "len(xs) " + str(len(xs))
                history_entries[-1].set_u_dash(u_dash)
                """ Calc the actual control input """
                u = u_dash + us[i]
                """ Enforce the control constraints on 'u' and 'u_dash' """
                if self.control_constraints_enforced:
                    u, u_dash = self.enforce_control_constraints(u, us[i])
                history_entries[-1].set_action(u)

                history_entries[-1].set_nominal_action(us[i])
                """ Apply the control 'u' and propagate the state 'x_true' """
                x_true_temp, ce = self.apply_control(x_true, u,
                                                     control_durations[i],
                                                     As[i], Bs[i], Vs[i],
                                                     Ms[i], xs[i], us[i])

                x_dash_linear_temp = self.get_linearized_next_state(
                    x_dash, u_dash, ce, As[i], Bs[i], Vs[i])
                x_true_linear_temp = np.add(x_dash_linear_temp, xs[i + 1])
                if self.enforce_constraints:
                    x_true_temp = self.check_constraints(x_true_temp)
                    x_true_linear_temp = self.check_constraints(
                        x_true_linear_temp)
                """ Calc the linearized next state (used to compare with x_true) """
                discount = np.power(self.discount_factor, current_step - 1)
                """ Check if the propagated state collides with an obstacle
                If yes, the true state is set to the previous state with 0 velocity.
                If not, set the true state to the propagated state
                """
                collided = False
                in_collision_true_state, colliding_obstacle = self.is_in_collision(
                    x_true, x_true_temp)
                self.set_colliding_obstacle(colliding_obstacle)

                if in_collision_true_state:  #or current_step == 7:
                    print "in collision!!!!"
                    for j in xrange(len(x_true) / 2, len(x_true)):
                        x_true[j] = 0.0
                        x_true_linear[j] = 0.0
                    logging.info(
                        "Simulator: Collision detected. Setting state estimate to the previous state"
                    )

                    total_reward += discount * (-1.0 *
                                                self.illegal_move_penalty)
                    history_entries[-1].set_reward(
                        discount * (-1.0 * self.illegal_move_penalty))
                    try:
                        history_entries[-1].set_colliding_obstacle(
                            colliding_obstacle.getName())
                    except:
                        pass
                    history_entries[-1].set_colliding_state(x_true_temp)
                    collided = True
                    """ Calculate the state deviation from the nominal path """
                    x_dash = np.subtract(x_true, xs[i])
                    x_dash_linear = np.subtract(x_true_linear, xs[i])
                else:
                    total_reward += discount * (-1.0 * self.step_penalty)
                    history_entries[-1].set_reward(discount *
                                                   (-1.0 * self.step_penalty))
                    x_true = x_true_temp
                    x_true_linear = x_true_linear_temp
                    x_dash = np.subtract(x_true, xs[i + 1])
                    x_dash_linear = np.subtract(x_true_linear, xs[i + 1])
                """ Do the same for the linearized true state """
                '''if self.is_in_collision(x_true_linear, x_true_linear_temp)[0]:
                    for j in xrange(len(x_true_linear) / 2, len(x_true_linear)):
                        x_true_linear[j] = 0.0                  
                else:'''

                linearization_error = utils.dist(x_dash, x_dash_linear)
                history_entries[-1].set_linearization_error(
                    linearization_error)
                """ Get the end effector position for the true state """
                state = v_double()
                state[:] = [x_true[j] for j in xrange(len(x_true) / 2)]
                ee_position_arr = v_double()
                self.robot.getEndEffectorPosition(state, ee_position_arr)
                ee_position = np.array(
                    [ee_position_arr[j] for j in xrange(len(ee_position_arr))])
                logging.info("Simulator: Current end-effector position is " +
                             str(ee_position))
                """ Generate an observation for the true state """
                z = self.get_observation(x_true, Hs[i + 1], Ns[i + 1],
                                         Ws[i + 1])
                try:
                    z_dash = np.subtract(z, zs[i + 1])
                    history_entries[-1].set_observation(z)
                except:
                    print "len(xs) " + str(len(xs))
                    print "len(zs) " + str(len(zs))
                """ Kalman prediction and update """
                x_tilde_dash, P_dash = kalman.kalman_predict(
                    x_tilde, u_dash, As[i], Bs[i], P_t, Vs[i], Ms[i])
                x_tilde, P_t = kalman.kalman_update(x_tilde_dash, z_dash,
                                                    Hs[i], P_dash, Ws[i],
                                                    Ns[i])
                """ x_estimate_new is the estimated state """
                x_estimate_new = x_tilde + xs[i + 1]
                """ Enforce the constraints to the estimated state """
                if self.enforce_constraints:
                    x_estimate_new = self.check_constraints(x_estimate_new)
                """ Check if the estimated state collides.
                If yes, set it to the previous estimate (with velicity 0).
                If no, set the true estimate to this estimate 
                """
                estimate_collided = True
                if self.is_in_collision([], x_estimate_new)[0]:
                    for l in xrange(len(x_estimate) / 2, len(x_estimate)):
                        x_estimate[l] = 0
                elif in_collision_true_state and self.knows_collision:
                    for l in xrange(len(x_estimate) / 2, len(x_estimate)):
                        x_estimate[l] = 0
                else:
                    x_estimate = x_estimate_new
                    estimate_collided = False
                '''if not self.is_in_collision([], x_estimate_new)[0]:                                                                                                    
                    x_estimate = x_estimate_new                   
                    estimate_collided = False
                else:                    
                    for l in xrange(len(x_estimate) / 2, len(x_estimate)):
                        x_estimate[l] = 0'''

                estimated_states.append(x_estimate)
                estimated_covariances.append(P_t)
                """ Update the viewer to display the true state """
                '''self.update_viewer(x_true,
                                   x_estimate, 
                                   z, 
                                   control_durations[i], 
                                   colliding_obstacle)'''
                """ Check if the end-effector position is terminal. If yes, we're done """
                if self.is_terminal(ee_position):
                    discount = np.power(self.discount_factor, current_step)
                    history_entries.append(
                        HistoryEntry(current_step, x_true, xs[i + 1],
                                     x_true_linear, x_estimate, x_dash,
                                     x_dash_linear, 0.0, None, None, z, P_t,
                                     False, False, True,
                                     discount * self.exit_reward))
                    terminal_state_reached = True
                    total_reward += discount * self.exit_reward
                    history_entries[-1].set_linearization_error(
                        utils.dist(x_dash, x_dash_linear))
                    logging.info("Terminal state reached: reward = " +
                                 str(total_reward))
                history_entries[-1].set_collided(collided)
                history_entries[-1].set_estimate_collided(estimate_collided)

                if current_step == max_num_steps and not terminal_state_reached:
                    """ This was the last step -> append final history entry"""
                    history_entries.append(
                        HistoryEntry(current_step, x_true, xs[i + 1],
                                     x_true_linear, x_estimate, x_dash,
                                     x_dash_linear, 0.0, None, None, z, P_t,
                                     False, False, False, 0.0))
                    history_entries[-1].set_linearization_error(
                        utils.dist(x_dash, x_dash_linear))
                    #total_reward += discount * self.exit_reward
                if (collided and self.stop_when_colliding):
                    break
        #print "========================================"
        #print "======= Simulation done"
        #print "========================================"
        return (x_true, x_tilde, x_dash_linear, x_estimate, z, P_t,
                current_step, total_reward, terminal_state_reached,
                estimated_states, estimated_covariances, history_entries)
예제 #5
0
    def evaluate(self,
                 index,
                 path,
                 P_t,
                 current_step,
                 horizon,
                 robot,
                 eval_queue=None,
                 deviation_covariance=None,
                 estimated_deviation_covariance=None):
        if self.show_viewer:
            robot.setupViewer(self.model_file, self.env_file)
        xs = path[0]
        us = path[1]
        zs = path[2]
        #self.show_nominal_path(robot, xs)
        control_durations = path[3]
        horizon_L = horizon + 1
        if horizon == -1 or len(xs) < horizon_L:
            horizon_L = len(xs)

        As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(
            xs, us, control_durations)
        Ls = kalman.compute_gain(As, Bs, self.C, self.D, horizon_L - 1)
        Q_t = np.vstack((np.hstack(
            (self.M, np.zeros((self.M.shape[0], self.N.shape[1])))),
                         np.hstack((np.zeros(
                             (self.N.shape[0], self.M.shape[1])), self.N))))
        R_t = np.vstack((np.hstack(
            (np.copy(P_t), np.zeros((P_t.shape[0], P_t.shape[1])))),
                         np.hstack((np.zeros((P_t.shape[0], P_t.shape[1])),
                                    np.zeros((P_t.shape[0], P_t.shape[1]))))))
        ee_distributions = []
        ee_approx_distr = []
        collision_probs = []
        path_rewards = []
        expected_num_collisions_path = 0
        try:
            (state_reward, terminal, expected_num_collisions_step,
             samples) = self.get_expected_state_reward(xs[0], P_t)
        except:
            eval_queue.put(None)
            return
        expected_num_collisions_path += expected_num_collisions_step
        path_rewards.append(
            np.power(self.discount, current_step) * state_reward)
        Cov = 0
        CPi = []
        estimated_state_covariances = [P_t]
        deviation_covariances = []
        estimated_deviation_covariances = []

        if deviation_covariance == None:
            deviation_covariances.append(
                np.zeros((2 * self.robot_dof, 2 * self.robot_dof)))
        else:
            deviation_covariances.append(deviation_covariance)

        if estimated_deviation_covariance == None:
            estimated_deviation_covariances.append(
                np.zeros((2 * self.robot_dof, 2 * self.robot_dof)))
        else:
            estimated_deviation_covariances.append(
                estimated_deviation_covariance)

        for i in xrange(1, horizon_L):
            P_hat_t = kalman.compute_p_hat_t(As[i], P_t, Vs[i], Ms[i])
            K_t = kalman.compute_kalman_gain(Hs[i], P_hat_t, Ws[i], Ns[i])
            P_t = kalman.compute_P_t(K_t, Hs[i], P_hat_t)
            F_0 = np.hstack((As[i], np.dot(Bs[i], Ls[i - 1])))
            F_1 = np.hstack(
                (np.dot(K_t, np.dot(Hs[i], As[i])), As[i] +
                 np.dot(Bs[i], Ls[i - 1]) - np.dot(K_t, np.dot(Hs[i], As[i]))))
            F_t = np.vstack((F_0, F_1))
            G_t_l_r = np.dot(K_t, Ws[i])
            G_t_u_r = np.zeros((Vs[i].shape[0], G_t_l_r.shape[1]))
            G_t = np.vstack((np.hstack((Vs[i], G_t_u_r)),
                             np.hstack((np.dot(K_t, np.dot(Hs[i],
                                                           Vs[i])), G_t_l_r))))
            """ R_t is the covariance matrix of [x_dash_t, x_tilde_t]^T
            """
            R_t = np.dot(F_t, np.dot(R_t, F_t.T)) + np.dot(
                G_t, np.dot(Q_t, G_t.T))
            deviation_covariance = np.array(
                [[R_t[j, k] for k in xrange(R_t.shape[1] / 2)]
                 for j in xrange(R_t.shape[0] / 2)])
            deviation_covariances.append(deviation_covariance)
            estimated_deviation_covariance = np.array(
                [[R_t[j, k] for k in xrange(R_t.shape[1] / 2, R_t.shape[1])]
                 for j in xrange(R_t.shape[0] / 2, R_t.shape[0])])
            estimated_deviation_covariances.append(
                estimated_deviation_covariance)
            L = np.identity(2 * self.robot_dof)
            if i != horizon_L - 1:
                L = Ls[i]
            Gamma_t = np.vstack((np.hstack(
                (np.identity(L.shape[1]), np.zeros((L.shape[1], L.shape[1])))),
                                 np.hstack((np.zeros(
                                     (L.shape[0], L.shape[1])), L))))
            Cov = np.dot(Gamma_t, np.dot(R_t, Gamma_t.T))
            cov_state = np.array(
                [[Cov[j, k] for k in xrange(2 * self.robot_dof)]
                 for j in xrange(2 * self.robot_dof)])
            estimated_state_covariances.append(cov_state)
            try:
                (state_reward, terminal, expected_num_collisions_step,
                 samples) = self.get_expected_state_reward(xs[i], cov_state)
            except:
                eval_queue.put(None)
                return
            CPi.append(expected_num_collisions_step)
            expected_num_collisions_path += expected_num_collisions_step
            path_rewards.append(
                np.power(self.discount, current_step + i) * state_reward)
            if self.show_viewer:
                self.show_state_and_cov(xs[i], cov_state, samples)
                time.sleep(0.2)
        path_reward = sum(path_rewards)
        product = 1.0
        for i in xrange(len(CPi)):
            product *= (1.0 - CPi[i])
        '''
        CP is the probability that the nominal path collides, approximated using a multiplicative approach
        '''
        CP = 1 - product
        #CP = sum(CPi)
        print "PathEvaluator: Path " + str(index) + " evaluated. Reward: " + str(path_reward) + ", mean num collisions: " + str(expected_num_collisions_path) + " " \
        #      "CP " + str(CP)

        logging.info("========================================")
        logging.info("PathEvaluator: reward for path " + str(index) + " is " +
                     str(path_reward))
        logging.info("========================================")
        if not eval_queue == None:
            eval_queue.put(
                (index, path_reward, path, Cov, estimated_state_covariances,
                 deviation_covariances, estimated_deviation_covariances))
        else:
            return (path_reward, estimated_state_covariances,
                    deviation_covariances, estimated_deviation_covariances)