def simulate_n_steps(self, xs, us, zs, control_durations, x_true, x_tilde, x_tilde_linear, x_estimate, P_t, total_reward, current_step, n_steps, deviation_covariances, estimated_deviation_covariances, max_num_steps=None): history_entries = [] terminal_state_reached = False success = False collided = False x_dash = np.copy(x_tilde) x_dash_linear = np.copy(x_tilde_linear) x_true_linear = x_true As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(xs, us, control_durations) Ls = kalman.compute_gain(As, Bs, self.C, self.D, len(xs) - 1) logging.info("Simulator: Executing for " + str(n_steps) + " steps") estimated_states = [] estimated_covariances = [] self.show_nominal_path(xs) """ Execute the nominal path for n_steps """ for i in xrange(n_steps): if not terminal_state_reached: linearization_error = utils.dist(x_dash, x_dash_linear) history_entries.append(HistoryEntry(current_step, x_true, xs[i], x_true_linear, x_estimate, x_dash, x_dash_linear, linearization_error, None, None, None, P_t, False, False, False, 0.0)) current_step += 1 history_entries[-1].set_s_dash_estimated(x_tilde) """ Calc u_dash from the estimated deviation of the state from the nominal path 'x_tilde' using the optimal gain L """ u_dash = np.dot(Ls[i], x_tilde) history_entries[-1].set_u_dash(u_dash) """ Calc the actual control input """ u = u_dash + us[i] """ Enforce the control constraints on 'u' and 'u_dash' """ if self.enforce_constrol_constraints: u, u_dash = self.enforce_control_constraints(u, us[i]) history_entries[-1].set_action(u) history_entries[-1].set_nominal_action(us[i]) """ Apply the control 'u' and propagate the state 'x_true' """ x_true_temp, ce = self.apply_control(x_true, u, control_durations[i], As[i], Bs[i], Vs[i], Ms[i]) """ Calc the linearized next state (used to compare with x_true) """ x_dash_linear_temp = self.get_linearized_next_state(x_dash_linear, u_dash, ce, As[i], Bs[i], Vs[i]) x_true_linear_temp = np.add(x_dash_linear_temp, xs[i+1]) if self.enforce_constraints: x_true_linear_temp = self.check_constraints(x_true_linear_temp) discount = np.power(self.discount_factor, current_step - 1) """ Check if the propagated state collides with an obstacle If yes, the true state is set to the previous state with 0 velocity. If not, set the true state to the propagated state """ collided = False in_collision_true_state, colliding_obstacle = self.is_in_collision(x_true, x_true_temp) if in_collision_true_state: for j in xrange(len(x_true) / 2, len(x_true)): x_true[j] = 0.0 logging.info("Simulator: Collision detected. Setting state estimate to the previous state") total_reward += discount * (-1.0 * self.illegal_move_penalty) history_entries[-1].set_reward(discount * (-1.0 * self.illegal_move_penalty)) history_entries[-1].set_colliding_obstacle(colliding_obstacle.getName()) history_entries[-1].set_colliding_state(x_true_temp) collided = True else: total_reward += discount * (-1.0 * self.step_penalty) history_entries[-1].set_reward(discount * (-1.0 * self.illegal_move_penalty)) x_true = x_true_temp """ Do the same for the linearized true state """ if self.is_in_collision(x_true_linear, x_true_linear_temp)[0]: for j in xrange(len(x_true_linear) / 2, len(x_true_linear)): x_true_linear[j] = 0.0 else: x_true_linear = x_true_linear_temp """ Calculate the state deviation from the nominal path """ x_dash = np.subtract(x_true, xs[i + 1]) x_dash_linear = np.subtract(x_true_linear , xs[i + 1]) """ Get the end effector position for the true state """ state = v_double() state[:] = [x_true[j] for j in xrange(len(x_true) / 2)] ee_position_arr = v_double() self.robot.getEndEffectorPosition(state, ee_position_arr) ee_position = np.array([ee_position_arr[j] for j in xrange(len(ee_position_arr))]) logging.info("Simulator: Current end-effector position is " + str(ee_position)) """ Generate an observation for the true state """ z = self.get_observation(x_true, Hs[i + 1], Ns[i + 1], Ws[i + 1]) z_dash = np.subtract(z, zs[i+1]) history_entries[-1].set_observation(z) """ Update the viewer to display the true state """ self.update_viewer(x_true, z, control_durations[i], colliding_obstacle) """ Kalman prediction and update """ x_tilde_dash, P_dash = kalman.kalman_predict(x_tilde, u_dash, As[i], Bs[i], P_t, Vs[i], Ms[i]) x_tilde, P_t = kalman.kalman_update(x_tilde_dash, z_dash, Hs[i], P_dash, Ws[i], Ns[i]) """ x_estimate_new is the estimated state """ x_estimate_new = x_tilde + xs[i + 1] """ Enforce the constraints to the estimated state """ if self.enforce_constraints: x_estimate_new = self.check_constraints(x_estimate_new) """ Check if the estimated state collides. If yes, set it to the previous estimate (with velicity 0). If no, set the true estimate to this estimate """ estimate_collided = True if not self.is_in_collision([], x_estimate_new)[0]: x_estimate = x_estimate_new estimate_collided = False else: for i in xrange(len(x_estimate) / 2, len(x_estimate)): x_estimate[i] = 0 estimated_states.append(x_estimate) estimated_covariances.append(P_t) """ Check if the end-effector position is terminal. If yes, we're done """ if self.is_terminal(ee_position): discount = np.power(self.discount_factor, current_step) history_entries.append(HistoryEntry(current_step, x_true, xs[i + 1], x_true_linear, x_estimate, x_dash, x_dash_linear, 0.0, None, None, z, P_t, False, False, True, discount * self.exit_reward)) terminal_state_reached = True total_reward += discount * self.exit_reward history_entries[-1].set_linearization_error(utils.dist(x_dash, x_dash_linear)) logging.info("Terminal state reached: reward = " + str(total_reward)) history_entries[-1].set_collided(collided) history_entries[-1].set_estimate_collided(estimate_collided) if current_step == max_num_steps: """ This was the last step -> append final history entry""" history_entries.append(HistoryEntry(current_step, x_true, xs[i + 1], x_true_linear, x_estimate, x_dash, x_dash_linear, 0.0, None, None, z, P_t, False, False, False, 0.0)) history_entries[-1].set_linearization_error(utils.dist(x_dash, x_dash_linear)) #total_reward += discount * self.exit_reward if (collided and self.stop_when_colliding): break #print "========================================" #print "======= Simulation done" #print "========================================" return (x_true, x_tilde, x_dash_linear, x_estimate, P_t, current_step, total_reward, terminal_state_reached, estimated_states, estimated_covariances, history_entries)
def evaluate(self, index, path, P_t, current_step, horizon, robot, eval_queue=None, deviation_covariance=None, estimated_deviation_covariance=None): if self.show_viewer: robot.setupViewer(self.model_file, self.env_file) xs = path[0] us = path[1] zs = path[2] #self.show_nominal_path(robot, xs) control_durations = path[3] horizon_L = horizon + 1 if horizon == -1 or len(xs) < horizon_L: horizon_L = len(xs) As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(xs, us, control_durations) Ls = kalman.compute_gain(As, Bs, self.C, self.D, horizon_L - 1) Q_t = np.vstack((np.hstack((self.M, np.zeros((self.M.shape[0], self.N.shape[1])))), np.hstack((np.zeros((self.N.shape[0], self.M.shape[1])), self.N)))) R_t = np.vstack((np.hstack((np.copy(P_t), np.zeros((P_t.shape[0], P_t.shape[1])))), np.hstack((np.zeros((P_t.shape[0], P_t.shape[1])), np.zeros((P_t.shape[0], P_t.shape[1])))))) ee_distributions = [] ee_approx_distr = [] collision_probs = [] path_rewards = [] expected_num_collisions_path = 0 (state_reward, terminal, expected_num_collisions_step, samples) = self.get_expected_state_reward(xs[0], P_t) expected_num_collisions_path += expected_num_collisions_step path_rewards.append(np.power(self.discount, current_step) * state_reward) Cov = 0 CPi = [] estimated_state_covariances = [P_t] deviation_covariances = [] estimated_deviation_covariances = [] if deviation_covariance == None: deviation_covariances.append(np.zeros((2 * self.robot_dof, 2 * self.robot_dof))) else: deviation_covariances.append(deviation_covariance) if estimated_deviation_covariance == None: estimated_deviation_covariances.append(np.zeros((2 * self.robot_dof, 2 * self.robot_dof))) else: estimated_deviation_covariances.append(estimated_deviation_covariance) for i in xrange(1, horizon_L): P_hat_t = kalman.compute_p_hat_t(As[i], P_t, Vs[i], Ms[i]) K_t = kalman.compute_kalman_gain(Hs[i], P_hat_t, Ws[i], Ns[i]) P_t = kalman.compute_P_t(K_t, Hs[i], P_hat_t) F_0 = np.hstack((As[i], np.dot(Bs[i], Ls[i - 1]))) F_1 = np.hstack((np.dot(K_t, np.dot(Hs[i], As[i])), As[i] + np.dot(Bs[i], Ls[i - 1]) - np.dot(K_t, np.dot(Hs[i], As[i])))) F_t = np.vstack((F_0, F_1)) G_t_l_r = np.dot(K_t, Ws[i]) G_t_u_r = np.zeros((Vs[i].shape[0], G_t_l_r.shape[1])) G_t = np.vstack((np.hstack((Vs[i], G_t_u_r)), np.hstack((np.dot(K_t, np.dot(Hs[i], Vs[i])), G_t_l_r)))) """ R_t is the covariance matrix of [x_dash_t, x_tilde_t]^T """ R_t = np.dot(F_t, np.dot(R_t, F_t.T)) + np.dot(G_t, np.dot(Q_t, G_t.T)) deviation_covariance = np.array([[R_t[j, k] for k in xrange(R_t.shape[1] / 2)] for j in xrange(R_t.shape[0] / 2)]) deviation_covariances.append(deviation_covariance) estimated_deviation_covariance = np.array([[R_t[j, k] for k in xrange(R_t.shape[1] / 2, R_t.shape[1])] for j in xrange(R_t.shape[0] / 2, R_t.shape[0])]) estimated_deviation_covariances.append(estimated_deviation_covariance) L = np.identity(2 * self.robot_dof) if i != horizon_L - 1: L = Ls[i] Gamma_t = np.vstack((np.hstack((np.identity(L.shape[1]), np.zeros((L.shape[1], L.shape[1])))), np.hstack((np.zeros((L.shape[0], L.shape[1])), L)))) Cov = np.dot(Gamma_t, np.dot(R_t, Gamma_t.T)) cov_state = np.array([[Cov[j, k] for k in xrange(2 * self.robot_dof)] for j in xrange(2 * self.robot_dof)]) estimated_state_covariances.append(cov_state) (state_reward, terminal, expected_num_collisions_step, samples) = self.get_expected_state_reward(xs[i], cov_state) CPi.append(expected_num_collisions_step) expected_num_collisions_path += expected_num_collisions_step path_rewards.append(np.power(self.discount, current_step + i) * state_reward) if self.show_viewer: self.show_state_and_cov(xs[i], cov_state, samples) time.sleep(0.2) path_reward = sum(path_rewards) product = 1.0 for i in xrange(len(CPi)): product *= (1.0 - CPi[i]) ''' CP is the probability that the nominal path collides, approximated using a multiplicative approach ''' CP = 1 - product #CP = sum(CPi) #print "PathEvaluator: Path " + str(index) + " evaluated. Reward: " + str(path_reward) + ", mean num collisions: " + str(expected_num_collisions_path) + " " \ # "CP " + str(CP) logging.info("========================================") logging.info("PathEvaluator: reward for path " + str(index) + " is " + str(path_reward)) logging.info("========================================") if not eval_queue==None: eval_queue.put((index, path_reward, path, Cov, estimated_state_covariances, deviation_covariances, estimated_deviation_covariances)) else: return (path_reward, estimated_state_covariances, deviation_covariances, estimated_deviation_covariances)
def adjust_plan(self, robot, plan, x_estimated, P_t): xs = [plan[0][i] for i in xrange(1, len(plan[0]))] us = [plan[1][i] for i in xrange(1, len(plan[1]))] zs = [plan[2][i] for i in xrange(1, len(plan[2]))] control_durations = [plan[3][i] for i in xrange(1, len(plan[3]))] As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices(robot, xs, us, control_durations) Ls = kalman.compute_gain(As, Bs, self.C, self.D, len(xs) - 1) xs_adjusted = [] us_adjusted = [] zs_adjusted = [] xs_adjusted.append(xs[0]) zs_adjusted.append(zs[0]) try: x_tilde = x_estimated - xs[0] except Exception as e: print e print "===================" print "x_estimated " + str(x_estimated) print "xs[0] " + str(xs[0]) print "xs " + str(xs) return (None, None, None, None, True) for i in xrange(len(xs) - 1): x_predicted = np.array([xs[i][k] for k in xrange(len(xs[i]))]) u = np.dot(Ls[i], x_estimated - x_predicted) + us[i] if self.control_constraints_enforced: u = self.enforce_control_constraints(u) if self.dynamic_problem: current_state = v_double() current_state[:] = [xs_adjusted[i][j] for j in xrange(len(xs_adjusted[i]))] control = v_double() control[:] = u control_error = v_double() control_error[:] = [0.0 for k in xrange(len(u))] result = v_double() robot.propagate(current_state, control, control_error, self.simulation_step_size, control_durations[i], result) xs_adjusted.append(np.array([result[k] for k in xrange(len(result))])) us_adjusted.append(np.array([u[k] for k in xrange(len(u))])) zs_adjusted.append(np.array([result[k] for k in xrange(len(result))])) #print "xs_adjusted: " + str(np.array([result[k] for k in xrange(len(result))])) #print "xs_prior: " + str(xs[i + 1]) else: current_state = np.array([xs_adjusted[i][j] for j in xrange(len(xs_adjusted[i]))]) result = np.dot(As[i], current_state) + np.dot(Bs[i], u) xs_adjusted.append(np.array([result[k] for k in xrange(len(result))])) us_adjusted.append(np.array([u[k] for k in xrange(len(u))])) zs_adjusted.append(np.array([result[k] for k in xrange(len(result))])) u_dash = u - us[i] z_dash = np.array([0.0 for k in xrange(len(zs_adjusted[-1]))]) """ Maximum likelikhood observation """ """ Kalman prediction and update """ x_tilde_dash, P_dash = kalman.kalman_predict(x_tilde, u_dash, As[i], Bs[i], P_t, Vs[i], Ms[i]) x_tilde, P_t = kalman.kalman_update(x_tilde_dash, z_dash, Hs[i], P_dash, Ws[i], Ns[i]) x_estimated = x_tilde + xs[i + 1] #x_estimated = x_tilde + xs_adjusted[-1] us_adjusted.append(np.array([0.0 for i in xrange(len(us[0]))])) control_durations_adjusted = [control_durations[i] for i in xrange(len(control_durations))] control_durations_adjusted.append(0.0) return (xs_adjusted, us_adjusted, zs_adjusted, control_durations_adjusted, True)
def simulate_n_steps(self, xs, us, zs, control_durations, x_true, x_estimate, P_t, total_reward, current_step, n_steps, deviation_covariances, estimated_deviation_covariances, max_num_steps=None): history_entries = [] terminal_state_reached = False success = False collided = False z = None x_dash = np.subtract(np.array(x_true), np.array(xs[0])) x_dash_linear = np.copy(x_dash) x_true_linear = x_true x_tilde = np.array(x_estimate) - np.array(xs[0]) As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices( xs, us, control_durations) Ls = kalman.compute_gain(As, Bs, self.C, self.D, len(xs) - 1) logging.info("Simulator: Executing for " + str(n_steps) + " steps") estimated_states = [] estimated_covariances = [] self.show_nominal_path(xs) """ Execute the nominal path for n_steps """ for i in xrange(n_steps): if not terminal_state_reached: history_entries.append( HistoryEntry(current_step, x_true, xs[i], x_true_linear, x_estimate, x_dash, x_dash_linear, 0.0, None, None, None, P_t, False, False, False, 0.0)) x_t_n = np.linalg.norm( np.array([x_true[k] for k in xrange(len(x_true) / 2)])) x_e_n = np.linalg.norm( np.array( [x_estimate[k] for k in xrange(len(x_estimate) / 2)])) x_true_norm = np.array([0.0 for k in xrange(len(x_true) / 2)]) x_estimate_norm = np.array( [0.0 for k in xrange(len(x_estimate) / 2)]) if x_t_n != 0: x_true_norm = np.array( [x_true[k] for k in xrange(len(x_true) / 2)]) / x_t_n if x_e_n != 0: x_estimate_norm = np.array( [x_estimate[k] for k in xrange(len(x_estimate) / 2)]) / x_e_n history_entries[-1].set_estimation_error( np.linalg.norm(np.array(x_true) - np.array(x_estimate))) history_entries[-1].set_estimation_error_normalized( np.linalg.norm( np.array(x_true_norm) - np.array(x_estimate_norm))) current_step += 1 history_entries[-1].set_s_dash_estimated(x_tilde) """ Calc u_dash from the estimated deviation of the state from the nominal path 'x_tilde' using the optimal gain L """ try: u_dash = np.dot(Ls[i], x_tilde) except Exception as e: print e print "i " + str(i) print "len(LS) " + str(len(Ls)) print "len(xs) " + str(len(xs)) history_entries[-1].set_u_dash(u_dash) """ Calc the actual control input """ u = u_dash + us[i] """ Enforce the control constraints on 'u' and 'u_dash' """ if self.control_constraints_enforced: u, u_dash = self.enforce_control_constraints(u, us[i]) history_entries[-1].set_action(u) history_entries[-1].set_nominal_action(us[i]) """ Apply the control 'u' and propagate the state 'x_true' """ x_true_temp, ce = self.apply_control(x_true, u, control_durations[i], As[i], Bs[i], Vs[i], Ms[i], xs[i], us[i]) x_dash_linear_temp = self.get_linearized_next_state( x_dash, u_dash, ce, As[i], Bs[i], Vs[i]) x_true_linear_temp = np.add(x_dash_linear_temp, xs[i + 1]) if self.enforce_constraints: x_true_temp = self.check_constraints(x_true_temp) x_true_linear_temp = self.check_constraints( x_true_linear_temp) """ Calc the linearized next state (used to compare with x_true) """ discount = np.power(self.discount_factor, current_step - 1) """ Check if the propagated state collides with an obstacle If yes, the true state is set to the previous state with 0 velocity. If not, set the true state to the propagated state """ collided = False in_collision_true_state, colliding_obstacle = self.is_in_collision( x_true, x_true_temp) self.set_colliding_obstacle(colliding_obstacle) if in_collision_true_state: #or current_step == 7: print "in collision!!!!" for j in xrange(len(x_true) / 2, len(x_true)): x_true[j] = 0.0 x_true_linear[j] = 0.0 logging.info( "Simulator: Collision detected. Setting state estimate to the previous state" ) total_reward += discount * (-1.0 * self.illegal_move_penalty) history_entries[-1].set_reward( discount * (-1.0 * self.illegal_move_penalty)) try: history_entries[-1].set_colliding_obstacle( colliding_obstacle.getName()) except: pass history_entries[-1].set_colliding_state(x_true_temp) collided = True """ Calculate the state deviation from the nominal path """ x_dash = np.subtract(x_true, xs[i]) x_dash_linear = np.subtract(x_true_linear, xs[i]) else: total_reward += discount * (-1.0 * self.step_penalty) history_entries[-1].set_reward(discount * (-1.0 * self.step_penalty)) x_true = x_true_temp x_true_linear = x_true_linear_temp x_dash = np.subtract(x_true, xs[i + 1]) x_dash_linear = np.subtract(x_true_linear, xs[i + 1]) """ Do the same for the linearized true state """ '''if self.is_in_collision(x_true_linear, x_true_linear_temp)[0]: for j in xrange(len(x_true_linear) / 2, len(x_true_linear)): x_true_linear[j] = 0.0 else:''' linearization_error = utils.dist(x_dash, x_dash_linear) history_entries[-1].set_linearization_error( linearization_error) """ Get the end effector position for the true state """ state = v_double() state[:] = [x_true[j] for j in xrange(len(x_true) / 2)] ee_position_arr = v_double() self.robot.getEndEffectorPosition(state, ee_position_arr) ee_position = np.array( [ee_position_arr[j] for j in xrange(len(ee_position_arr))]) logging.info("Simulator: Current end-effector position is " + str(ee_position)) """ Generate an observation for the true state """ z = self.get_observation(x_true, Hs[i + 1], Ns[i + 1], Ws[i + 1]) try: z_dash = np.subtract(z, zs[i + 1]) history_entries[-1].set_observation(z) except: print "len(xs) " + str(len(xs)) print "len(zs) " + str(len(zs)) """ Kalman prediction and update """ x_tilde_dash, P_dash = kalman.kalman_predict( x_tilde, u_dash, As[i], Bs[i], P_t, Vs[i], Ms[i]) x_tilde, P_t = kalman.kalman_update(x_tilde_dash, z_dash, Hs[i], P_dash, Ws[i], Ns[i]) """ x_estimate_new is the estimated state """ x_estimate_new = x_tilde + xs[i + 1] """ Enforce the constraints to the estimated state """ if self.enforce_constraints: x_estimate_new = self.check_constraints(x_estimate_new) """ Check if the estimated state collides. If yes, set it to the previous estimate (with velicity 0). If no, set the true estimate to this estimate """ estimate_collided = True if self.is_in_collision([], x_estimate_new)[0]: for l in xrange(len(x_estimate) / 2, len(x_estimate)): x_estimate[l] = 0 elif in_collision_true_state and self.knows_collision: for l in xrange(len(x_estimate) / 2, len(x_estimate)): x_estimate[l] = 0 else: x_estimate = x_estimate_new estimate_collided = False '''if not self.is_in_collision([], x_estimate_new)[0]: x_estimate = x_estimate_new estimate_collided = False else: for l in xrange(len(x_estimate) / 2, len(x_estimate)): x_estimate[l] = 0''' estimated_states.append(x_estimate) estimated_covariances.append(P_t) """ Update the viewer to display the true state """ '''self.update_viewer(x_true, x_estimate, z, control_durations[i], colliding_obstacle)''' """ Check if the end-effector position is terminal. If yes, we're done """ if self.is_terminal(ee_position): discount = np.power(self.discount_factor, current_step) history_entries.append( HistoryEntry(current_step, x_true, xs[i + 1], x_true_linear, x_estimate, x_dash, x_dash_linear, 0.0, None, None, z, P_t, False, False, True, discount * self.exit_reward)) terminal_state_reached = True total_reward += discount * self.exit_reward history_entries[-1].set_linearization_error( utils.dist(x_dash, x_dash_linear)) logging.info("Terminal state reached: reward = " + str(total_reward)) history_entries[-1].set_collided(collided) history_entries[-1].set_estimate_collided(estimate_collided) if current_step == max_num_steps and not terminal_state_reached: """ This was the last step -> append final history entry""" history_entries.append( HistoryEntry(current_step, x_true, xs[i + 1], x_true_linear, x_estimate, x_dash, x_dash_linear, 0.0, None, None, z, P_t, False, False, False, 0.0)) history_entries[-1].set_linearization_error( utils.dist(x_dash, x_dash_linear)) #total_reward += discount * self.exit_reward if (collided and self.stop_when_colliding): break #print "========================================" #print "======= Simulation done" #print "========================================" return (x_true, x_tilde, x_dash_linear, x_estimate, z, P_t, current_step, total_reward, terminal_state_reached, estimated_states, estimated_covariances, history_entries)
def evaluate(self, index, path, P_t, current_step, horizon, robot, eval_queue=None, deviation_covariance=None, estimated_deviation_covariance=None): if self.show_viewer: robot.setupViewer(self.model_file, self.env_file) xs = path[0] us = path[1] zs = path[2] #self.show_nominal_path(robot, xs) control_durations = path[3] horizon_L = horizon + 1 if horizon == -1 or len(xs) < horizon_L: horizon_L = len(xs) As, Bs, Vs, Ms, Hs, Ws, Ns = self.get_linear_model_matrices( xs, us, control_durations) Ls = kalman.compute_gain(As, Bs, self.C, self.D, horizon_L - 1) Q_t = np.vstack((np.hstack( (self.M, np.zeros((self.M.shape[0], self.N.shape[1])))), np.hstack((np.zeros( (self.N.shape[0], self.M.shape[1])), self.N)))) R_t = np.vstack((np.hstack( (np.copy(P_t), np.zeros((P_t.shape[0], P_t.shape[1])))), np.hstack((np.zeros((P_t.shape[0], P_t.shape[1])), np.zeros((P_t.shape[0], P_t.shape[1])))))) ee_distributions = [] ee_approx_distr = [] collision_probs = [] path_rewards = [] expected_num_collisions_path = 0 try: (state_reward, terminal, expected_num_collisions_step, samples) = self.get_expected_state_reward(xs[0], P_t) except: eval_queue.put(None) return expected_num_collisions_path += expected_num_collisions_step path_rewards.append( np.power(self.discount, current_step) * state_reward) Cov = 0 CPi = [] estimated_state_covariances = [P_t] deviation_covariances = [] estimated_deviation_covariances = [] if deviation_covariance == None: deviation_covariances.append( np.zeros((2 * self.robot_dof, 2 * self.robot_dof))) else: deviation_covariances.append(deviation_covariance) if estimated_deviation_covariance == None: estimated_deviation_covariances.append( np.zeros((2 * self.robot_dof, 2 * self.robot_dof))) else: estimated_deviation_covariances.append( estimated_deviation_covariance) for i in xrange(1, horizon_L): P_hat_t = kalman.compute_p_hat_t(As[i], P_t, Vs[i], Ms[i]) K_t = kalman.compute_kalman_gain(Hs[i], P_hat_t, Ws[i], Ns[i]) P_t = kalman.compute_P_t(K_t, Hs[i], P_hat_t) F_0 = np.hstack((As[i], np.dot(Bs[i], Ls[i - 1]))) F_1 = np.hstack( (np.dot(K_t, np.dot(Hs[i], As[i])), As[i] + np.dot(Bs[i], Ls[i - 1]) - np.dot(K_t, np.dot(Hs[i], As[i])))) F_t = np.vstack((F_0, F_1)) G_t_l_r = np.dot(K_t, Ws[i]) G_t_u_r = np.zeros((Vs[i].shape[0], G_t_l_r.shape[1])) G_t = np.vstack((np.hstack((Vs[i], G_t_u_r)), np.hstack((np.dot(K_t, np.dot(Hs[i], Vs[i])), G_t_l_r)))) """ R_t is the covariance matrix of [x_dash_t, x_tilde_t]^T """ R_t = np.dot(F_t, np.dot(R_t, F_t.T)) + np.dot( G_t, np.dot(Q_t, G_t.T)) deviation_covariance = np.array( [[R_t[j, k] for k in xrange(R_t.shape[1] / 2)] for j in xrange(R_t.shape[0] / 2)]) deviation_covariances.append(deviation_covariance) estimated_deviation_covariance = np.array( [[R_t[j, k] for k in xrange(R_t.shape[1] / 2, R_t.shape[1])] for j in xrange(R_t.shape[0] / 2, R_t.shape[0])]) estimated_deviation_covariances.append( estimated_deviation_covariance) L = np.identity(2 * self.robot_dof) if i != horizon_L - 1: L = Ls[i] Gamma_t = np.vstack((np.hstack( (np.identity(L.shape[1]), np.zeros((L.shape[1], L.shape[1])))), np.hstack((np.zeros( (L.shape[0], L.shape[1])), L)))) Cov = np.dot(Gamma_t, np.dot(R_t, Gamma_t.T)) cov_state = np.array( [[Cov[j, k] for k in xrange(2 * self.robot_dof)] for j in xrange(2 * self.robot_dof)]) estimated_state_covariances.append(cov_state) try: (state_reward, terminal, expected_num_collisions_step, samples) = self.get_expected_state_reward(xs[i], cov_state) except: eval_queue.put(None) return CPi.append(expected_num_collisions_step) expected_num_collisions_path += expected_num_collisions_step path_rewards.append( np.power(self.discount, current_step + i) * state_reward) if self.show_viewer: self.show_state_and_cov(xs[i], cov_state, samples) time.sleep(0.2) path_reward = sum(path_rewards) product = 1.0 for i in xrange(len(CPi)): product *= (1.0 - CPi[i]) ''' CP is the probability that the nominal path collides, approximated using a multiplicative approach ''' CP = 1 - product #CP = sum(CPi) print "PathEvaluator: Path " + str(index) + " evaluated. Reward: " + str(path_reward) + ", mean num collisions: " + str(expected_num_collisions_path) + " " \ # "CP " + str(CP) logging.info("========================================") logging.info("PathEvaluator: reward for path " + str(index) + " is " + str(path_reward)) logging.info("========================================") if not eval_queue == None: eval_queue.put( (index, path_reward, path, Cov, estimated_state_covariances, deviation_covariances, estimated_deviation_covariances)) else: return (path_reward, estimated_state_covariances, deviation_covariances, estimated_deviation_covariances)