def __init__(self): self.abs_path = os.path.dirname(os.path.abspath(__file__)) cmd = "rm -rf " + self.abs_path + "/tmp" popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True) popen.wait() """ Reading the config """ warnings.filterwarnings("ignore") self.init_serializer() config = self.serializer.read_config("config_hrf.yaml", path=self.abs_path) self.set_params(config) if self.seed < 0: """ Generate a random seed that will be stored """ self.seed = np.random.randint(0, 42949672) np.random.seed(self.seed) logging_level = logging.WARN if config['verbose']: logging_level = logging.DEBUG logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level) np.set_printoptions(precision=16) dir = self.abs_path + "/stats/hrf" tmp_dir = self.abs_path + "/tmp/hrf" self.utils = Utils() if not self.init_robot(self.robot_file): logging.error("HRF: Couldn't initialize robot") return if not self.setup_scene(self.environment_file, self.robot): return self.clear_stats(dir) logging.info("Start up simulator") sim = Simulator() plan_adjuster = PlanAdjuster() path_evaluator = PathEvaluator() path_planner = PathPlanningInterface() if self.show_viewer_simulation: self.robot.setupViewer(self.robot_file, self.environment_file) logging.info("HRF: Generating goal states...") problem_feasible = False while not problem_feasible: print "Creating random scene..." #self.create_random_obstacles(20) goal_states = get_goal_states( "hrf", self.abs_path, self.serializer, self.obstacles, self.robot, self.max_velocity, self.delta_t, self.start_state, self.goal_position, self.goal_radius, self.planning_algortihm, self.path_timeout, self.num_generated_goal_states, self.continuous_collision, self.environment_file, self.num_cores) if len(goal_states) == 0: logging.error( "HRF: Couldn't generate any goal states. Problem seems to be infeasible" ) else: problem_feasible = True '''obst = v_obstacle() obst[:] = self.obstacles self.robot.addObstacles(obst)''' logging.info("HRF: Generated " + str(len(goal_states)) + " goal states") sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward) path_planner.setup(self.robot, self.obstacles, self.max_velocity, self.delta_t, self.use_linear_path, self.planning_algortihm, self.path_timeout, self.continuous_collision, self.num_cores) if self.dynamic_problem: path_planner.setup_dynamic_problem( self.robot_file, self.environment_file, self.simulation_step_size, self.num_control_samples, self.min_control_duration, self.max_control_duration, self.add_intermediate_states, self.rrt_goal_bias, self.control_sampler) A, H, B, V, W, C, D, M_base, N_base = self.problem_setup( self.delta_t, self.robot_dof) time_to_generate_paths = 0.0 if check_positive_definite([C, D]): m_covs = None if self.inc_covariance == "process": m_covs = np.linspace(self.min_process_covariance, self.max_process_covariance, self.covariance_steps) elif self.inc_covariance == "observation": m_covs = np.linspace(self.min_observation_covariance, self.max_observation_covariance, self.covariance_steps) for j in xrange(len(m_covs)): M = None N = None if self.inc_covariance == "process": """ The process noise covariance matrix """ M = self.calc_covariance_value(self.robot, m_covs[j], M_base) #M = m_covs[j] * M_base """ The observation error covariance matrix """ N = self.calc_covariance_value(self.robot, self.min_observation_covariance, N_base, covariance_type='observation') elif self.inc_covariance == "observation": M = self.calc_covariance_value(self.robot, self.min_process_covariance, M_base) N = self.calc_covariance_value(self.robot, m_covs[j], N_base, covariance_type='observation') path_planner.setup_path_evaluator( A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.robot_file, self.environment_file) sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot, self.enforce_control_constraints, self.obstacles, self.goal_position, self.goal_radius, self.max_velocity, self.show_viewer_simulation, self.robot_file, self.environment_file, self.knows_collision) path_evaluator.setup(A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.show_viewer_evaluation, self.robot_file, self.environment_file, self.num_cores) path_evaluator.setup_reward_function(self.step_penalty, self.illegal_move_penalty, self.exit_reward, self.discount_factor) plan_adjuster.setup(self.robot, M, H, W, N, C, D, self.dynamic_problem, self.enforce_control_constraints) plan_adjuster.set_simulation_step_size(self.simulation_step_size) plan_adjuster.set_model_matrices(A, B, V) if not self.dynamic_problem: plan_adjuster.set_max_joint_velocities_linear_problem( np.array( [self.max_velocity for i in xrange(self.robot_dof)])) sim.set_stop_when_colliding(self.replan_when_colliding) if self.dynamic_problem: path_evaluator.setup_dynamic_problem() sim.setup_dynamic_problem(self.simulation_step_size) path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor) mean_number_planning_steps = 0.0 number_of_steps = 0.0 mean_planning_time = 0.0 num_generated_paths_run = 0.0 successful_runs = 0 num_collisions = 0.0 linearization_error = 0.0 final_states = [] rewards_cov = [] for k in xrange(self.num_simulation_runs): print "HRF: Run " + str(k + 1) self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n") current_step = 0 x_true = np.array([ self.start_state[m] for m in xrange(len(self.start_state)) ]) x_estimated = np.array([ self.start_state[m] for m in xrange(len(self.start_state)) ]) #x_estimated[0] += 0.2 #x_estimated[0] = 0.4 #x_predicted = self.start_state P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) P_ext_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) deviation_covariance = np.array( [[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) estimated_deviation_covariance = np.array( [[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) total_reward = 0.0 terminal = False last_x_true = None """ Obtain a nominal path """ print "plan" path_planner.set_start_and_goal(x_estimated, goal_states, self.goal_position, self.goal_radius) t0 = time.time() (xs, us, zs, control_durations, num_generated_paths, objective, state_covariances, deviation_covariances, estimated_deviation_covariances, mean_gen_times, mean_eval_times, total_gen_times, total_eval_times) = path_planner.plan_and_evaluate_paths( 1, 0, current_step, self.evaluation_horizon, P_t, deviation_covariance, estimated_deviation_covariance, 0.0) print "objective " + str(objective) while True: print "current step " + str(current_step) '''if current_step == 7: x_true = np.array([last_x_true[k] for k in xrange(len(last_x_true))]) for k in xrange(len(last_x_true) / 2, len(last_x_true)): last_x_true[k] = 0.0''' """ Predict system state at t+1 using nominal path """ """ Get state matrices """ #As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(xs, us, control_durations) As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices( [x_estimated], [us[0]], control_durations) """ Predict using EKF """ (x_predicted_temp, P_predicted) = kalman.predict_state( self.robot, x_estimated, us[0], control_durations[0], self.simulation_step_size, As[0], Bs[0], Vs[0], Ms[0], P_ext_t, self.dynamic_problem) """ Make sure x_predicted fulfills the constraints """ if self.enforce_constraints: x_predicted_temp = sim.check_constraints( x_predicted_temp) predicted_collided = True if not sim.is_in_collision([], x_predicted_temp)[0]: x_predicted = x_predicted_temp predicted_collided = False else: print "X_PREDICTED COLLIDES!" x_predicted = x_estimated for l in xrange( len(x_predicted) / 2, len(x_predicted)): x_predicted[l] = 0 last_x_true = np.array( [x_true[k] for k in xrange(len(x_true))]) """ Execute path for 1 time step """ (x_true, x_tilde, x_tilde_linear, x_estimated_dash, z, P_t, current_step, total_reward, terminal, estimated_s, estimated_c, history_entries) = sim.simulate_n_steps( xs, us, zs, control_durations, x_true, x_estimated, P_t, total_reward, current_step, 1, 0.0, 0.0, max_num_steps=self.max_num_steps) history_entries[-1].set_best_reward(objective) """ Process history entries """ try: deviation_covariance = deviation_covariances[ len(history_entries) - 1] estimated_deviation_covariance = estimated_deviation_covariances[ len(history_entries) - 1] except: print "what: len(deviation_covariances) " + str( len(deviation_covariances)) print "len(history_entries) " + str( len(history_entries)) print "len(xs) " + str(len(xs)) history_entries[0].set_replanning(True) for l in xrange(len(history_entries)): try: history_entries[l].set_estimated_covariance( state_covariances[l]) except: print "l " + str(l) print "len(state_covariances) " + str( len(state_covariances)) if history_entries[l].collided: num_collisions += 1 linearization_error += history_entries[ l].linearization_error if (current_step == self.max_num_steps) or terminal: history_entries[len(history_entries) - 2].set_best_reward(objective) history_entries[-1].set_best_reward(None) for l in xrange(len(history_entries)): history_entries[l].serialize(tmp_dir, "log.log") final_states.append(history_entries[-1].x_true) if terminal: print "Terminal state reached" successful_runs += 1 break """ Plan new trajectories from predicted state """ path_planner.set_start_and_goal(x_predicted, goal_states, self.goal_position, self.goal_radius) t0 = time.time() paths = path_planner.plan_paths( self.num_paths, 0, planning_timeout=self.timeout, min_num_paths=1) mean_planning_time += time.time() - t0 mean_number_planning_steps += 1.0 num_generated_paths_run += len(paths) """ Filter update """ (x_estimated_temp, P_ext_t) = kalman.filter_update(x_predicted, P_predicted, z, Hs[0], Ws[0], Ns[0]) """ Make sure x_estimated fulfills the constraints """ if self.enforce_constraints: x_estimated_temp = sim.check_constraints( x_estimated_temp) in_collision, colliding_obstacle = sim.is_in_collision( [], x_estimated_temp) if in_collision: history_entries[-1].set_estimate_collided(True) for l in xrange( len(x_estimated) / 2, len(x_estimated)): x_estimated[l] = 0 elif history_entries[-1].collided and self.knows_collision: for l in xrange( len(x_estimated) / 2, len(x_estimated)): x_estimated[l] = 0 else: x_estimated = x_estimated_temp history_entries[-1].set_estimate_collided(False) history_entries[-1].serialize(tmp_dir, "log.log") """ Adjust plan """ t0 = time.time() (xs_adj, us_adj, zs_adj, control_durations_adj, adjusted) = plan_adjuster.adjust_plan( self.robot, (xs, us, zs, control_durations), x_estimated, P_t) if not adjusted: final_states.append(history_entries[-1].x_true) print "current step " + str(current_step) raise ValueError("Raised") break if self.show_viewer_simulation: sim.update_viewer( x_true, x_estimated, z, control_duration=0.03, colliding_obstacle=sim.colliding_obstacle) """ Evaluate the adjusted plan and the planned paths """ #if self.is_terminal(xs_adj[-1]) and not len(xs_adj)==1: if not len(xs_adj) <= 1: """ Only evaluate the adjusted path if it's > 1 """ paths.extend( [[xs_adj, us_adj, zs_adj, control_durations_adj]]) if len(paths) == 0: """ Running out of paths """ print "Error: Couldn't generate an evaluate any paths" final_states.append(history_entries[-1].x_true) break (path_index, xs, us, zs, control_durations, objective, state_covariances, deviation_covariances, estimated_deviation_covariances ) = path_evaluator.evaluate_paths(paths, P_t, current_step) mean_planning_time += time.time() - t0 if path_index == None: """ We couldn't evaluate any paths """ final_states.append(history_entries[-1].x_true) break if self.show_viewer_simulation: self.visualize_paths(self.robot, [xs]) rewards_cov.append(total_reward) self.serializer.write_line( "log.log", tmp_dir, "Reward: " + str(total_reward) + " \n") self.serializer.write_line("log.log", tmp_dir, "\n") number_of_steps += current_step mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps mean_planning_time /= self.num_simulation_runs mean_num_generated_paths_step = num_generated_paths_run / mean_number_planning_steps """ Calculate the distance to goal area """ ee_position_distances = [] for state in final_states: joint_angles = v_double() joint_angles[:] = [state[y] for y in xrange(len(state) / 2)] ee_position = v_double() self.robot.getEndEffectorPosition(joint_angles, ee_position) ee_position = np.array( [ee_position[y] for y in xrange(len(ee_position))]) dist = np.linalg.norm( np.array(self.goal_position - ee_position)) if dist < self.goal_radius: dist = 0.0 ee_position_distances.append(dist) logging.info("HRF: Done. total_reward is " + str(total_reward)) try: n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe( np.array(ee_position_distances)) except: print ee_position_distances self.serializer.write_line("log.log", tmp_dir, "################################# \n") self.serializer.write_line( "log.log", tmp_dir, "inc_covariance: " + str(self.inc_covariance) + "\n") if self.inc_covariance == "process": self.serializer.write_line( "log.log", tmp_dir, "Process covariance: " + str(m_covs[j]) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Observation covariance: " + str(self.min_observation_covariance) + " \n") elif self.inc_covariance == "observation": self.serializer.write_line( "log.log", tmp_dir, "Process covariance: " + str(self.min_process_covariance) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Observation covariance: " + str(m_covs[j]) + " \n") mean_linearization_error = linearization_error / number_of_steps number_of_steps /= self.num_simulation_runs self.serializer.write_line( "log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean number of generated paths per step: " + str(mean_num_generated_paths_step) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Average distance to goal area: " + str(mean_distance_to_goal) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n") print "succ " + str( (100.0 / self.num_simulation_runs) * successful_runs) self.serializer.write_line( "log.log", tmp_dir, "Percentage of successful runs: " + str( (100.0 / self.num_simulation_runs) * successful_runs) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n") n, min_max, mean, var, skew, kurt = scipy.stats.describe( np.array(rewards_cov)) print "mean_rewards " + str(mean) #plt.plot_histogram_from_data(rewards_cov) #sleep self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean linearisation error: " + str(mean_linearization_error) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Reward standard deviation: " + str(np.sqrt(var)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n") cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_hrf_" + str( m_covs[j]) + ".log" os.system(cmd) cmd = "cp " + self.abs_path + "/config_hrf.yaml " + dir os.system(cmd) if not os.path.exists(dir + "/environment"): os.makedirs(dir + "/environment") cmd = "cp " + self.abs_path + "/" + str( self.environment_file) + " " + str(dir) + "/environment" os.system(cmd) if not os.path.exists(dir + "/model"): os.makedirs(dir + "/model") cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model" os.system(cmd) print "Done."
def __init__(self, plot): self.abs_path = os.path.dirname(os.path.abspath(__file__)) """ Reading the config """ warnings.filterwarnings("ignore") self.init_serializer() config = self.serializer.read_config("config_mpc.yaml", path=self.abs_path) self.set_params(config) if self.seed < 0: """ Generate a random seed that will be stored """ self.seed = np.random.randint(0, 4294967295) np.random.seed(self.seed) logging_level = logging.WARN if config['verbose']: logging_level = logging.DEBUG logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level) np.set_printoptions(precision=16) dir = self.abs_path + "/stats/mpc" tmp_dir = self.abs_path + "/tmp/mpc" self.utils = Utils() if not self.init_robot(self.robot_file): logging.error("MPC: Couldn't initialize robot") return if not self.setup_scene(self.environment_file, self.robot): return #self.run_viewer(self.robot_file, self.environment_file) self.clear_stats(dir) logging.info("Start up simulator") sim = Simulator() path_evaluator = PathEvaluator() path_planner = PathPlanningInterface() logging.info("MPC: Generating goal states...") goal_states = get_goal_states("mpc", self.abs_path, self.serializer, self.obstacles, self.robot, self.max_velocity, self.delta_t, self.start_state, self.goal_position, self.goal_radius, self.planning_algortihm, self.path_timeout, self.num_generated_goal_states, self.continuous_collision, self.environment_file, self.num_cores) if len(goal_states) == 0: logging.error("MPC: Couldn't generate any goal states. Problem seems to be infeasible") return logging.info("MPC: Generated " + str(len(goal_states)) + " goal states") sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward) path_planner.setup(self.robot, self.obstacles, self.max_velocity, self.delta_t, self.use_linear_path, self.planning_algortihm, self.path_timeout, self.continuous_collision, self.num_cores) if self.dynamic_problem: path_planner.setup_dynamic_problem(self.robot_file, self.environment_file, self.simulation_step_size, self.num_control_samples, self.min_control_duration, self.max_control_duration, self.add_intermediate_states, self.rrt_goal_bias, self.control_sampler) A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof) time_to_generate_paths = 0.0 if check_positive_definite([C, D]): m_covs = None if self.inc_covariance == "process": m_covs = np.linspace(self.min_process_covariance, self.max_process_covariance, self.covariance_steps) elif self.inc_covariance == "observation": m_covs = np.linspace(self.min_observation_covariance, self.max_observation_covariance, self.covariance_steps) for j in xrange(len(m_covs)): M = None N = None if self.inc_covariance == "process": """ The process noise covariance matrix """ M = self.calc_covariance_value(self.robot, m_covs[j], M_base) #M = m_covs[j] * M_base """ The observation error covariance matrix """ N = self.calc_covariance_value(self.robot, self.min_observation_covariance, N_base, covariance_type='observation') elif self.inc_covariance == "observation": M = self.calc_covariance_value(self.robot, self.min_process_covariance, M_base) N = self.calc_covariance_value(self.robot, m_covs[j], N_base, covariance_type='observation') P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]) path_planner.setup_path_evaluator(A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.robot_file, self.environment_file) sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot, self.enforce_control_constraints, self.obstacles, self.goal_position, self.goal_radius, self.max_velocity, self.show_viewer_simulation, self.robot_file, self.environment_file) sim.set_stop_when_colliding(self.replan_when_colliding) if self.dynamic_problem: path_evaluator.setup_dynamic_problem() sim.setup_dynamic_problem(self.simulation_step_size) path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor) mean_number_planning_steps = 0.0 number_of_steps = 0.0 mean_planning_time = 0.0 num_generated_paths_run = 0.0 successful_runs = 0 num_collisions = 0.0 final_states= [] rewards_cov = [] for k in xrange(self.num_simulation_runs): print "MPC: Run " + str(k + 1) self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n") current_step = 0 x_true = self.start_state x_estimate = self.start_state x_tilde_linear = np.array([0.0 for i in xrange(2 * self.robot_dof)]) P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) estimated_deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) total_reward = 0.0 terminal = False while current_step < self.max_num_steps and not terminal: path_planner.set_start_and_goal(x_estimate, goal_states, self.goal_position, self.goal_radius) t0 = time.time() (xs, us, zs, control_durations, num_generated_paths, best_val, state_covariances, deviation_covariances, estimated_deviation_covariances, mean_gen_times, mean_eval_times, total_gen_times, total_eval_times) = path_planner.plan_and_evaluate_paths(self.num_paths, 0, current_step, self.evaluation_horizon, P_t, deviation_covariance, estimated_deviation_covariance, self.timeout) mean_planning_time += time.time() - t0 mean_number_planning_steps += 1.0 num_generated_paths_run += num_generated_paths if len(xs) == 0: logging.error("MPC: Couldn't find any paths from start state" + str(x_estimate) + " to goal states") total_reward = np.array([-self.illegal_move_penalty])[0] current_step += 1 break x_tilde = np.array([0.0 for i in xrange(2 * self.robot_dof)]) n_steps = self.num_execution_steps if n_steps > len(xs) - 1: n_steps = len(xs) - 1 if current_step + n_steps > self.max_num_steps: n_steps = self.max_num_steps - current_step (x_true, x_tilde, x_tilde_linear, x_estimate, P_t, current_step, total_reward, terminal, estimated_s, estimated_c, history_entries) = sim.simulate_n_steps(xs, us, zs, control_durations, x_true, x_tilde, x_tilde_linear, x_estimate, P_t, total_reward, current_step, n_steps, 0.0, 0.0, max_num_steps=self.max_num_steps) #print "len(hist) " + str(len(history_entries)) #print "len(deviation_covariances) " + str(len(deviation_covariances)) #print "len(estimated_deviation_covariances) " + str(len(estimated_deviation_covariances)) deviation_covariance = deviation_covariances[len(history_entries) - 1] estimated_deviation_covariance = estimated_deviation_covariances[len(history_entries) - 1] if (current_step == self.max_num_steps) or terminal: final_states.append(history_entries[-1].x_true) print "len " + str(len(history_entries)) print "t " + str(history_entries[-1].t) if terminal: successful_runs += 1 history_entries[0].set_replanning(True) for l in xrange(len(history_entries)): history_entries[l].set_estimated_covariance(state_covariances[l]) history_entries[l].serialize(tmp_dir, "log.log") if history_entries[l].collided: num_collisions += 1 collided = True #logging.warn("MPC: Execution finished. True state is " + str(x_true)) #logging.warn("MPC: Estimated state is " + str(x_estimate)) logging.warn("MPC: Executed " + str(current_step) + " steps") logging.warn("MPC: terminal " + str(terminal)) if terminal: print "MPC: Final state: " + str(x_true) rewards_cov.append(total_reward) self.serializer.write_line("log.log", tmp_dir, "Reward: " + str(total_reward) + " \n") self.serializer.write_line("log.log", tmp_dir, "\n") number_of_steps += current_step mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps mean_planning_time /= self.num_simulation_runs """ Calculate the distance to goal area """ ee_position_distances = [] for state in final_states: joint_angles = v_double() joint_angles[:] = [state[y] for y in xrange(len(state) / 2)] ee_position = v_double() self.robot.getEndEffectorPosition(joint_angles, ee_position) ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))]) dist = np.linalg.norm(np.array(self.goal_position - ee_position)) if dist < self.goal_radius: dist = 0.0 ee_position_distances.append(dist) logging.info("MPC: Done. total_reward is " + str(total_reward)) try: n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances)) except: print ee_position_distances sleep self.serializer.write_line("log.log", tmp_dir, "################################# \n") self.serializer.write_line("log.log", tmp_dir, "inc_covariance: " + str(self.inc_covariance) + "\n") if self.inc_covariance == "process": self.serializer.write_line("log.log", tmp_dir, "Process covariance: " + str(m_covs[j]) + " \n") self.serializer.write_line("log.log", tmp_dir, "Observation covariance: " + str(self.min_observation_covariance) + " \n") elif self.inc_covariance == "observation": self.serializer.write_line("log.log", tmp_dir, "Process covariance: " + str(self.min_process_covariance) + " \n") self.serializer.write_line("log.log", tmp_dir, "Observation covariance: " + str(m_covs[j]) + " \n") number_of_steps /= self.num_simulation_runs self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Average distance to goal area: " + str(mean_distance_to_goal) + " \n") self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n") print "succ " + str((100.0 / self.num_simulation_runs) * successful_runs) self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successful_runs) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n") n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov)) print "mean_rewards " + str(mean) #plt.plot_histogram_from_data(rewards_cov) #sleep self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward standard deviation: " + str(np.sqrt(var)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n") cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_mpc_" + str(m_covs[j]) + ".log" os.system(cmd) cmd = "cp " + self.abs_path + "/config_mpc.yaml " + dir os.system(cmd) if not os.path.exists(dir + "/environment"): os.makedirs(dir + "/environment") cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment" os.system(cmd) if not os.path.exists(dir + "/model"): os.makedirs(dir + "/model") cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model" os.system(cmd) print "Done."
def __init__(self, show_scene, deserialize, append_paths): self.abs_path = os.path.dirname(os.path.abspath(__file__)) """ Reading the config """ warnings.filterwarnings("ignore") self.init_serializer() config = self.serializer.read_config("config_lqg.yaml", path=self.abs_path) self.set_params(config) if self.seed < 0: """ Generate a random seed that will be stored """ self.seed = np.random.randint(0, 4294967) np.random.seed(self.seed) logging_level = logging.WARN if config['verbose']: logging_level = logging.DEBUG logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level) np.set_printoptions(precision=16) dir = self.abs_path + "/stats/lqg" tmp_dir = self.abs_path + "/tmp/lqg" self.utils = Utils() if not self.init_robot(self.robot_file): logging.error("LQG: Couldn't initialize robot") return if not self.setup_scene(self.environment_file, self.robot): return #self.show_state_distribution(urdf_model_file, environment_file) #self.check_continuous_collide(self.robot_file, self.environment_file) if show_scene: self.test_sensor(self.robot_file, self.environment_file, self.abs_path + "/model/sensor.xml") self.run_viewer(self.robot_file, self.environment_file) self.clear_stats(dir) logging.info("Start up simulator") sim = Simulator() path_evaluator = PathEvaluator() path_planner = PathPlanningInterface() logging.info("LQG: Generating goal states...") goal_states = get_goal_states("lqg", self.abs_path, self.serializer, self.obstacles, self.robot, self.max_velocity, self.delta_t, self.start_state, self.goal_position, self.goal_radius, self.planning_algortihm, self.path_timeout, self.num_generated_goal_states, self.continuous_collision, self.environment_file, self.num_cores) if len(goal_states) == 0: logging.error("LQG: Couldn't generate any goal states. Problem seems to be infeasible") return logging.info("LQG: Generated " + str(len(goal_states)) + " goal states") sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward) path_planner.setup(self.robot, self.obstacles, self.max_velocity, self.delta_t, self.use_linear_path, self.planning_algortihm, self.path_timeout, self.continuous_collision, self.num_cores) if self.dynamic_problem: path_planner.setup_dynamic_problem(self.robot_file, self.environment_file, self.simulation_step_size, self.num_control_samples, self.min_control_duration, self.max_control_duration, self.add_intermediate_states, self.rrt_goal_bias, self.control_sampler) path_planner.set_start_and_goal(self.start_state, goal_states, self.goal_position, self.goal_radius) A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof) time_to_generate_paths = 0.0 if check_positive_definite([C, D]): m_covs = None if self.inc_covariance == "process": m_covs = np.linspace(self.min_process_covariance, self.max_process_covariance, self.covariance_steps) elif self.inc_covariance == "observation": m_covs = np.linspace(self.min_observation_covariance, self.max_observation_covariance, self.covariance_steps) emds = [] mean_planning_times = [] time_to_generate_paths = 0.0 paths = [] if ((not append_paths) and deserialize): paths = self.serializer.deserialize_paths(self.abs_path + "/paths.txt", self.robot_dof) #paths = [paths[26]] if len(paths) == 0: print "LQG: Generating " + str(self.num_paths) + " paths from the inital state to the goal position..." t0 = time.time() paths = path_planner.plan_paths(self.num_paths, 0) if len(paths) == 0: logging.error("LQG: Couldn't create any paths within the given time.") return time_to_generate_paths = time.time() - t0 print "LQG: Time to generate paths: " + str(time_to_generate_paths) + " seconds" if self.plot_paths: self.serializer.save_paths(paths, "paths.yaml", self.overwrite_paths_file, path=dir) if deserialize or append_paths: self.serializer.serialize_paths(self.abs_path + "/paths.txt", paths, append_paths, self.robot_dof) paths = self.serializer.deserialize_paths(self.abs_path + "/paths.txt", self.robot_dof) """ Determine average path length """ avg_path_length = self.get_avg_path_length(paths) self.serializer.save_avg_path_lengths(avg_path_length, path=dir) cart_coords = [] best_paths = [] all_rewards = [] successes = [] for j in xrange(len(m_covs)): print "LQG: Evaluating " + str(len(paths)) + " paths for covariance value " + str(m_covs[j]) + "..." M = None N = None if self.inc_covariance == "process": """ The process noise covariance matrix """ M = self.calc_covariance_value(self.robot, m_covs[j], M_base) #M = m_covs[j] * M_base """ The observation error covariance matrix """ N = self.calc_covariance_value(self.robot, self.min_observation_covariance, N_base, covariance_type='observation') elif self.inc_covariance == "observation": M = self.calc_covariance_value(self.robot, self.min_process_covariance, M_base) N = self.calc_covariance_value(self.robot, m_covs[j], N_base, covariance_type='observation') P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]) path_evaluator.setup(A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.show_viewer_evaluation, self.robot_file, self.environment_file, self.num_cores) if self.dynamic_problem: path_evaluator.setup_dynamic_problem() path_evaluator.setup_reward_function(self.step_penalty, self.illegal_move_penalty, self.exit_reward, self.discount_factor) t0 = time.time() (path_index, xs, us, zs, control_durations, objective, state_covariances, deviation_covariances, estimated_deviation_covariances) = path_evaluator.evaluate_paths(paths, P_t, 0) te = time.time() - t0 print "LQG: Time to evaluate " + str(len(paths)) + " paths: " + str(te) + "s" mean_planning_time = time_to_generate_paths + te print "LQG: Best objective value: " + str(objective) print "LQG: Length of best path: " + str(len(xs)) print "LQG: Best path has index " + str(path_index) best_paths.append([[xs[i] for i in xrange(len(xs))], [us[i] for i in xrange(len(us))], [zs[i] for i in xrange(len(zs))], [control_durations[i] for i in xrange(len(control_durations))]]) sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot, self.enforce_control_constraints, self.obstacles, self.goal_position, self.goal_radius, self.max_velocity, self.show_viewer_simulation, self.robot_file, self.environment_file) if self.dynamic_problem: sim.setup_dynamic_problem(self.simulation_step_size) successes = 0 num_collisions = 0 rewards_cov = [] final_states= [] num_steps = 0 collided_num = 0 print "LQG: Running " + str(self.num_simulation_runs) + " simulations..." for k in xrange(self.num_simulation_runs): self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n") print "simulation run: " + str(k) (x_true, x_tilde, x_tilde_linear, x_estimate, P_t, current_step, total_reward, terminal, estimated_s, estimated_c, history_entries) = sim.simulate_n_steps(xs, us, zs, control_durations, xs[0], np.array([0.0 for i in xrange(2 * self.robot_dof)]), np.array([0.0 for i in xrange(2 * self.robot_dof)]), xs[0], np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]), 0.0, 0, len(xs) - 1, deviation_covariances, estimated_deviation_covariances) if terminal: successes += 1 rewards_cov.append(total_reward) #n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov)) collided = False for l in xrange(len(history_entries)): history_entries[l].set_estimated_covariance(state_covariances[l]) history_entries[l].serialize(tmp_dir, "log.log") if history_entries[l].collided: num_collisions += 1 collided = True if collided: collided_num += 1 num_steps += history_entries[-1].t final_states.append(history_entries[-1].x_true) self.serializer.write_line("log.log", tmp_dir, "Reward: " + str(total_reward) + " \n") self.serializer.write_line("log.log", tmp_dir, "\n") """ Calculate the distance to goal area """ ee_position_distances = [] for state in final_states: joint_angles = v_double() joint_angles[:] = [state[y] for y in xrange(len(state) / 2)] ee_position = v_double() self.robot.getEndEffectorPosition(joint_angles, ee_position) ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))]) dist = np.linalg.norm(np.array(self.goal_position - ee_position)) if dist < self.goal_radius: dist = 0.0 ee_position_distances.append(dist) n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances)) self.serializer.write_line("log.log", tmp_dir, "################################# \n") self.serializer.write_line("log.log", tmp_dir, "inc_covariance: " + str(self.inc_covariance) + "\n") if self.inc_covariance == "process": self.serializer.write_line("log.log", tmp_dir, "Process covariance: " + str(m_covs[j]) + " \n") self.serializer.write_line("log.log", tmp_dir, "Observation covariance: " + str(self.min_observation_covariance) + " \n") elif self.inc_covariance == "observation": self.serializer.write_line("log.log", tmp_dir, "Process covariance: " + str(self.min_process_covariance) + " \n") self.serializer.write_line("log.log", tmp_dir, "Observation covariance: " + str(m_covs[j]) + " \n") num_steps /= self.num_simulation_runs self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(num_steps) + " \n") self.serializer.write_line("log.log", tmp_dir, "Objective value of best path: " + str(objective) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n") print "collision_prob " + str(collided_num / float(self.num_simulation_runs)) print "total num collisions " + str(num_collisions) print "mean num collisions " + str(float(num_collisions) / float(self.num_simulation_runs)) self.serializer.write_line("log.log", tmp_dir, "Length best path: " + str(len(xs)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Index of best path: " + str(path_index) + " \n") self.serializer.write_line("log.log", tmp_dir, "Average distance to goal area: " + str(mean_distance_to_goal) + " \n") self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successes) + " \n") print "succ " + str((100.0 / self.num_simulation_runs) * successes) self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successes) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean planning time: " + str(mean_planning_time) + " \n") n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov)) print "mean_rewards " + str(mean) #plt.plot_histogram_from_data(rewards_cov) #sleep self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward standard deviation: " + str(np.sqrt(var)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n") cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_lqg_" + str(m_covs[j]) + ".log" os.system(cmd) if self.plot_paths: self.serializer.save_paths(best_paths, 'best_paths.yaml', True, path=dir) #self.serializer.save_stats(stats, path=dir) cmd = "cp " + self.abs_path + "/config_lqg.yaml " + dir os.system(cmd) if not os.path.exists(dir + "/environment"): os.makedirs(dir + "/environment") cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment" os.system(cmd) if not os.path.exists(dir + "/model"): os.makedirs(dir + "/model") cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model" os.system(cmd) print "LQG: Time to generate paths: " + str(time_to_generate_paths) + " seconds" print "Done"
def __init__(self, show_scene, deserialize, append_paths): self.abs_path = os.path.dirname(os.path.abspath(__file__)) cmd = "rm -rf " + self.abs_path + "/tmp" popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True) popen.wait() """ Reading the config """ warnings.filterwarnings("ignore") self.init_serializer() config = self.serializer.read_config("config_lqg.yaml", path=self.abs_path) self.set_params(config) if self.seed < 0: """ Generate a random seed that will be stored """ self.seed = np.random.randint(0, 42949672) np.random.seed(self.seed) logging_level = logging.WARN if config['verbose']: logging_level = logging.DEBUG logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level) np.set_printoptions(precision=16) dir = self.abs_path + "/stats/lqg" tmp_dir = self.abs_path + "/tmp/lqg" self.utils = Utils() if not self.init_robot(self.robot_file): logging.error("LQG: Couldn't initialize robot") return if not self.setup_scene(self.environment_file, self.robot): return #self.show_state_distribution(urdf_model_file, environment_file) #self.check_continuous_collide(self.robot_file, self.environment_file) if show_scene: self.run_viewer(self.robot_file, self.environment_file) self.clear_stats(dir) logging.info("Start up simulator") sim = Simulator() path_evaluator = PathEvaluator() path_planner = PathPlanningInterface() logging.info("LQG: Generating goal states...") goal_states = get_goal_states( "lqg", self.abs_path, self.serializer, self.obstacles, self.robot, self.max_velocity, self.delta_t, self.start_state, self.goal_position, self.goal_radius, self.planning_algortihm, self.path_timeout, self.num_generated_goal_states, self.continuous_collision, self.environment_file, self.num_cores) if len(goal_states) == 0: logging.error( "LQG: Couldn't generate any goal states. Problem seems to be infeasible" ) return logging.info("LQG: Generated " + str(len(goal_states)) + " goal states") sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward) path_planner.setup(self.robot, self.obstacles, self.max_velocity, self.delta_t, self.use_linear_path, self.planning_algortihm, self.path_timeout, self.continuous_collision, self.num_cores) if self.dynamic_problem: path_planner.setup_dynamic_problem( self.robot_file, self.environment_file, self.simulation_step_size, self.num_control_samples, self.min_control_duration, self.max_control_duration, self.add_intermediate_states, self.rrt_goal_bias, self.control_sampler) path_planner.set_start_and_goal(self.start_state, goal_states, self.goal_position, self.goal_radius) A, H, B, V, W, C, D, M_base, N_base = self.problem_setup( self.delta_t, self.robot_dof) time_to_generate_paths = 0.0 if check_positive_definite([C, D]): m_covs = None if self.inc_covariance == "process": m_covs = np.linspace(self.min_process_covariance, self.max_process_covariance, self.covariance_steps) elif self.inc_covariance == "observation": m_covs = np.linspace(self.min_observation_covariance, self.max_observation_covariance, self.covariance_steps) emds = [] mean_planning_times = [] time_to_generate_paths = 0.0 paths = [] if ((not append_paths) and deserialize): paths = self.serializer.deserialize_paths( self.abs_path + "/paths.txt", self.robot_dof) #paths = [paths[26]] if len(paths) == 0: print "LQG: Generating " + str( self.num_paths ) + " paths from the inital state to the goal position..." t0 = time.time() paths = path_planner.plan_paths(self.num_paths, 0) if len(paths) == 0: logging.error( "LQG: Couldn't create any paths within the given time." ) return time_to_generate_paths = time.time() - t0 print "LQG: Time to generate paths: " + str( time_to_generate_paths) + " seconds" if self.plot_paths: self.serializer.save_paths(paths, "paths.yaml", self.overwrite_paths_file, path=dir) if deserialize or append_paths: self.serializer.serialize_paths( self.abs_path + "/paths.txt", paths, append_paths, self.robot_dof) paths = self.serializer.deserialize_paths( self.abs_path + "/paths.txt", self.robot_dof) """ Determine average path length """ avg_path_length = self.get_avg_path_length(paths) self.serializer.save_avg_path_lengths(avg_path_length, path=dir) cart_coords = [] best_paths = [] all_rewards = [] successes = [] for j in xrange(len(m_covs)): print "LQG: Evaluating " + str( len(paths)) + " paths for covariance value " + str( m_covs[j]) + "..." M = None N = None if self.inc_covariance == "process": """ The process noise covariance matrix """ M = self.calc_covariance_value(self.robot, m_covs[j], M_base) #M = m_covs[j] * M_base """ The observation error covariance matrix """ N = self.calc_covariance_value( self.robot, self.min_observation_covariance, N_base, covariance_type='observation') elif self.inc_covariance == "observation": M = self.calc_covariance_value(self.robot, self.min_process_covariance, M_base) N = self.calc_covariance_value( self.robot, m_covs[j], N_base, covariance_type='observation') P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]) path_evaluator.setup(A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.show_viewer_evaluation, self.robot_file, self.environment_file, self.num_cores) if self.dynamic_problem: path_evaluator.setup_dynamic_problem() path_evaluator.setup_reward_function(self.step_penalty, self.illegal_move_penalty, self.exit_reward, self.discount_factor) t0 = time.time() (path_index, xs, us, zs, control_durations, objective, state_covariances, deviation_covariances, estimated_deviation_covariances ) = path_evaluator.evaluate_paths(paths, P_t, 0) te = time.time() - t0 print "LQG: Time to evaluate " + str( len(paths)) + " paths: " + str(te) + "s" mean_planning_time = time_to_generate_paths + te print "LQG: Best objective value: " + str(objective) print "LQG: Length of best path: " + str(len(xs)) print "LQG: Best path has index " + str(path_index) best_paths.append([[xs[i] for i in xrange(len(xs))], [us[i] for i in xrange(len(us))], [zs[i] for i in xrange(len(zs))], [ control_durations[i] for i in xrange(len(control_durations)) ]]) sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot, self.enforce_control_constraints, self.obstacles, self.goal_position, self.goal_radius, self.max_velocity, self.show_viewer_simulation, self.robot_file, self.environment_file, self.collision_aware) if self.dynamic_problem: sim.setup_dynamic_problem(self.simulation_step_size) successes = 0 num_collisions = 0 rewards_cov = [] final_states = [] num_steps = 0 collided_num = 0 print "LQG: Running " + str( self.num_simulation_runs) + " simulations..." for k in xrange(self.num_simulation_runs): self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n") print "simulation run: " + str(k) (x_true, x_tilde, x_tilde_linear, x_estimate, z, P_t, current_step, total_reward, terminal, estimated_s, estimated_c, history_entries) = sim.simulate_n_steps( xs, us, zs, control_durations, xs[0], xs[0], np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]), 0.0, 0, len(xs) - 1, deviation_covariances, estimated_deviation_covariances) if terminal: print "TERMINAL" successes += 1 rewards_cov.append(total_reward) #n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov)) collided = False for l in xrange(len(history_entries)): history_entries[l].set_estimated_covariance( state_covariances[l]) history_entries[l].serialize(tmp_dir, "log.log") if history_entries[l].collided: num_collisions += 1 collided = True if collided: collided_num += 1 num_steps += history_entries[-1].t final_states.append(history_entries[-1].x_true) self.serializer.write_line( "log.log", tmp_dir, "Reward: " + str(total_reward) + " \n") self.serializer.write_line("log.log", tmp_dir, "\n") """ Calculate the distance to goal area """ ee_position_distances = [] for state in final_states: joint_angles = v_double() joint_angles[:] = [ state[y] for y in xrange(len(state) / 2) ] ee_position = v_double() self.robot.getEndEffectorPosition(joint_angles, ee_position) ee_position = np.array( [ee_position[y] for y in xrange(len(ee_position))]) dist = np.linalg.norm( np.array(self.goal_position - ee_position)) if dist < self.goal_radius: dist = 0.0 ee_position_distances.append(dist) n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe( np.array(ee_position_distances)) self.serializer.write_line( "log.log", tmp_dir, "################################# \n") self.serializer.write_line( "log.log", tmp_dir, "inc_covariance: " + str(self.inc_covariance) + "\n") if self.inc_covariance == "process": self.serializer.write_line( "log.log", tmp_dir, "Process covariance: " + str(m_covs[j]) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Observation covariance: " + str(self.min_observation_covariance) + " \n") elif self.inc_covariance == "observation": self.serializer.write_line( "log.log", tmp_dir, "Process covariance: " + str(self.min_process_covariance) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Observation covariance: " + str(m_covs[j]) + " \n") num_steps /= self.num_simulation_runs self.serializer.write_line( "log.log", tmp_dir, "Mean number of steps: " + str(num_steps) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Objective value of best path: " + str(objective) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean num collisions per run: " + str( float(num_collisions) / float(self.num_simulation_runs)) + " \n") print "collision_prob " + str( collided_num / float(self.num_simulation_runs)) print "total num collisions " + str(num_collisions) print "mean num collisions " + str( float(num_collisions) / float(self.num_simulation_runs)) self.serializer.write_line( "log.log", tmp_dir, "Length best path: " + str(len(xs)) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Index of best path: " + str(path_index) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Average distance to goal area: " + str(mean_distance_to_goal) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Num successes: " + str(successes) + " \n") print "succ " + str( (100.0 / self.num_simulation_runs) * successes) self.serializer.write_line( "log.log", tmp_dir, "Percentage of successful runs: " + str( (100.0 / self.num_simulation_runs) * successes) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean planning time: " + str(mean_planning_time) + " \n") n, min_max, mean, var, skew, kurt = scipy.stats.describe( np.array(rewards_cov)) print "mean_rewards " + str(mean) #plt.plot_histogram_from_data(rewards_cov) #sleep self.serializer.write_line( "log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Reward variance: " + str(var) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Reward standard deviation: " + str(np.sqrt(var)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n") cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_lqg_" + str( m_covs[j]) + ".log" os.system(cmd) if self.plot_paths: self.serializer.save_paths(best_paths, 'best_paths.yaml', True, path=dir) #self.serializer.save_stats(stats, path=dir) cmd = "cp " + self.abs_path + "/config_lqg.yaml " + dir os.system(cmd) if not os.path.exists(dir + "/environment"): os.makedirs(dir + "/environment") cmd = "cp " + self.abs_path + "/" + str( self.environment_file) + " " + str(dir) + "/environment" os.system(cmd) if not os.path.exists(dir + "/model"): os.makedirs(dir + "/model") cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model" os.system(cmd) print "LQG: Time to generate paths: " + str( time_to_generate_paths) + " seconds" print "Done"
def __init__(self, plot): self.abs_path = os.path.dirname(os.path.abspath(__file__)) """ Reading the config """ warnings.filterwarnings("ignore") self.init_serializer() config = self.serializer.read_config("config_mpc.yaml", path=self.abs_path) self.set_params(config) if self.seed < 0: """ Generate a random seed that will be stored """ self.seed = np.random.randint(0, 42949672) np.random.seed(self.seed) logging_level = logging.WARN if config['verbose']: logging_level = logging.DEBUG logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level) np.set_printoptions(precision=16) dir = self.abs_path + "/stats/mpc" tmp_dir = self.abs_path + "/tmp/mpc" self.utils = Utils() if not self.init_robot(self.robot_file): logging.error("MPC: Couldn't initialize robot") return if not self.setup_scene(self.environment_file, self.robot): return #self.run_viewer(self.robot_file, self.environment_file) self.clear_stats(dir) logging.info("Start up simulator") sim = Simulator() path_evaluator = PathEvaluator() path_planner = PathPlanningInterface() logging.info("MPC: Generating goal states...") goal_states = get_goal_states( "mpc", self.abs_path, self.serializer, self.obstacles, self.robot, self.max_velocity, self.delta_t, self.start_state, self.goal_position, self.goal_radius, self.planning_algortihm, self.path_timeout, self.num_generated_goal_states, self.continuous_collision, self.environment_file, self.num_cores) if len(goal_states) == 0: logging.error( "MPC: Couldn't generate any goal states. Problem seems to be infeasible" ) return logging.info("MPC: Generated " + str(len(goal_states)) + " goal states") sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward) path_planner.setup(self.robot, self.obstacles, self.max_velocity, self.delta_t, self.use_linear_path, self.planning_algortihm, self.path_timeout, self.continuous_collision, self.num_cores) if self.dynamic_problem: path_planner.setup_dynamic_problem( self.robot_file, self.environment_file, self.simulation_step_size, self.num_control_samples, self.min_control_duration, self.max_control_duration, self.add_intermediate_states, self.rrt_goal_bias, self.control_sampler) A, H, B, V, W, C, D, M_base, N_base = self.problem_setup( self.delta_t, self.robot_dof) time_to_generate_paths = 0.0 if check_positive_definite([C, D]): m_covs = None if self.inc_covariance == "process": m_covs = np.linspace(self.min_process_covariance, self.max_process_covariance, self.covariance_steps) elif self.inc_covariance == "observation": m_covs = np.linspace(self.min_observation_covariance, self.max_observation_covariance, self.covariance_steps) for j in xrange(len(m_covs)): M = None N = None if self.inc_covariance == "process": """ The process noise covariance matrix """ M = self.calc_covariance_value(self.robot, m_covs[j], M_base) #M = m_covs[j] * M_base """ The observation error covariance matrix """ N = self.calc_covariance_value(self.robot, self.min_observation_covariance, N_base, covariance_type='observation') elif self.inc_covariance == "observation": M = self.calc_covariance_value(self.robot, self.min_process_covariance, M_base) N = self.calc_covariance_value(self.robot, m_covs[j], N_base, covariance_type='observation') P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]) path_planner.setup_path_evaluator( A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.robot_file, self.environment_file) sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot, self.enforce_control_constraints, self.obstacles, self.goal_position, self.goal_radius, self.max_velocity, self.show_viewer_simulation, self.robot_file, self.environment_file) sim.set_stop_when_colliding(self.replan_when_colliding) if self.dynamic_problem: path_evaluator.setup_dynamic_problem() sim.setup_dynamic_problem(self.simulation_step_size) path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor) mean_number_planning_steps = 0.0 number_of_steps = 0.0 mean_planning_time = 0.0 num_generated_paths_run = 0.0 successful_runs = 0 num_collisions = 0.0 linearization_error = 0.0 final_states = [] rewards_cov = [] for k in xrange(self.num_simulation_runs): print "MPC: Run " + str(k + 1) self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n") current_step = 0 x_true = self.start_state x_estimate = self.start_state x_tilde_linear = np.array( [0.0 for i in xrange(2 * self.robot_dof)]) P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) deviation_covariance = np.array( [[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) estimated_deviation_covariance = np.array( [[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) total_reward = 0.0 terminal = False while current_step < self.max_num_steps and not terminal: path_planner.set_start_and_goal(x_estimate, goal_states, self.goal_position, self.goal_radius) t0 = time.time() (xs, us, zs, control_durations, num_generated_paths, best_val, state_covariances, deviation_covariances, estimated_deviation_covariances, mean_gen_times, mean_eval_times, total_gen_times, total_eval_times) = path_planner.plan_and_evaluate_paths( self.num_paths, 0, current_step, self.evaluation_horizon, P_t, deviation_covariance, estimated_deviation_covariance, self.timeout) mean_planning_time += time.time() - t0 mean_number_planning_steps += 1.0 num_generated_paths_run += num_generated_paths if len(xs) == 0: logging.error( "MPC: Couldn't find any paths from start state" + str(x_estimate) + " to goal states") final_states.append(x_true) #total_reward = np.array([-self.illegal_move_penalty])[0] current_step += 1 break x_tilde = np.array( [0.0 for i in xrange(2 * self.robot_dof)]) n_steps = self.num_execution_steps if n_steps > len(xs) - 1: n_steps = len(xs) - 1 if current_step + n_steps > self.max_num_steps: n_steps = self.max_num_steps - current_step (x_true, x_tilde, x_tilde_linear, x_estimate, P_t, current_step, total_reward, terminal, estimated_s, estimated_c, history_entries) = sim.simulate_n_steps( xs, us, zs, control_durations, x_true, x_tilde, x_tilde_linear, x_estimate, P_t, total_reward, current_step, n_steps, 0.0, 0.0, max_num_steps=self.max_num_steps) #print "len(hist) " + str(len(history_entries)) #print "len(deviation_covariances) " + str(len(deviation_covariances)) #print "len(estimated_deviation_covariances) " + str(len(estimated_deviation_covariances)) try: deviation_covariance = deviation_covariances[ len(history_entries) - 1] estimated_deviation_covariance = estimated_deviation_covariances[ len(history_entries) - 1] except: print "what: len(deviation_covariances) " + str( len(deviation_covariances)) print "len(history_entries) " + str( len(history_entries)) print "len(xs) " + str(len(xs)) if (current_step == self.max_num_steps) or terminal: final_states.append(history_entries[-1].x_true) if terminal: successful_runs += 1 history_entries[0].set_replanning(True) for l in xrange(len(history_entries)): try: history_entries[l].set_estimated_covariance( state_covariances[l]) except: print "l " + str(l) print "len(state_covariances) " + str( len(state_covariances)) history_entries[l].serialize(tmp_dir, "log.log") if history_entries[l].collided: num_collisions += 1 linearization_error += history_entries[ l].linearization_error #logging.warn("MPC: Execution finished. True state is " + str(x_true)) #logging.warn("MPC: Estimated state is " + str(x_estimate)) logging.warn("MPC: Executed " + str(current_step) + " steps") logging.warn("MPC: terminal " + str(terminal)) if terminal: logging.info("MPC: Final state: " + str(x_true)) rewards_cov.append(total_reward) self.serializer.write_line( "log.log", tmp_dir, "Reward: " + str(total_reward) + " \n") self.serializer.write_line("log.log", tmp_dir, "\n") number_of_steps += current_step mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps mean_planning_time /= self.num_simulation_runs """ Calculate the distance to goal area """ ee_position_distances = [] for state in final_states: joint_angles = v_double() joint_angles[:] = [state[y] for y in xrange(len(state) / 2)] ee_position = v_double() self.robot.getEndEffectorPosition(joint_angles, ee_position) ee_position = np.array( [ee_position[y] for y in xrange(len(ee_position))]) dist = np.linalg.norm( np.array(self.goal_position - ee_position)) if dist < self.goal_radius: dist = 0.0 ee_position_distances.append(dist) logging.info("MPC: Done. total_reward is " + str(total_reward)) try: n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe( np.array(ee_position_distances)) except: print ee_position_distances self.serializer.write_line("log.log", tmp_dir, "################################# \n") self.serializer.write_line( "log.log", tmp_dir, "inc_covariance: " + str(self.inc_covariance) + "\n") if self.inc_covariance == "process": self.serializer.write_line( "log.log", tmp_dir, "Process covariance: " + str(m_covs[j]) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Observation covariance: " + str(self.min_observation_covariance) + " \n") elif self.inc_covariance == "observation": self.serializer.write_line( "log.log", tmp_dir, "Process covariance: " + str(self.min_process_covariance) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Observation covariance: " + str(m_covs[j]) + " \n") mean_linearization_error = linearization_error / number_of_steps number_of_steps /= self.num_simulation_runs self.serializer.write_line( "log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Average distance to goal area: " + str(mean_distance_to_goal) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n") print "succ " + str( (100.0 / self.num_simulation_runs) * successful_runs) self.serializer.write_line( "log.log", tmp_dir, "Percentage of successful runs: " + str( (100.0 / self.num_simulation_runs) * successful_runs) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n") n, min_max, mean, var, skew, kurt = scipy.stats.describe( np.array(rewards_cov)) print "mean_rewards " + str(mean) #plt.plot_histogram_from_data(rewards_cov) #sleep self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Mean linearisation error: " + str(mean_linearization_error) + " \n") self.serializer.write_line( "log.log", tmp_dir, "Reward standard deviation: " + str(np.sqrt(var)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n") cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_mpc_" + str( m_covs[j]) + ".log" os.system(cmd) cmd = "cp " + self.abs_path + "/config_mpc.yaml " + dir os.system(cmd) if not os.path.exists(dir + "/environment"): os.makedirs(dir + "/environment") cmd = "cp " + self.abs_path + "/" + str( self.environment_file) + " " + str(dir) + "/environment" os.system(cmd) if not os.path.exists(dir + "/model"): os.makedirs(dir + "/model") cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model" os.system(cmd) print "Done."
def __init__(self): self.abs_path = os.path.dirname(os.path.abspath(__file__)) cmd = "rm -rf " + self.abs_path + "/tmp" popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True) popen.wait() """ Reading the config """ warnings.filterwarnings("ignore") self.init_serializer() config = self.serializer.read_config("config_hrf.yaml", path=self.abs_path) self.set_params(config) if self.seed < 0: """ Generate a random seed that will be stored """ self.seed = np.random.randint(0, 42949672) np.random.seed(self.seed) logging_level = logging.WARN if config['verbose']: logging_level = logging.DEBUG logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level) np.set_printoptions(precision=16) dir = self.abs_path + "/stats/hrf" tmp_dir = self.abs_path + "/tmp/hrf" self.utils = Utils() if not self.init_robot(self.robot_file): logging.error("HRF: Couldn't initialize robot") return if not self.setup_scene(self.environment_file, self.robot): return self.clear_stats(dir) logging.info("Start up simulator") sim = Simulator() plan_adjuster = PlanAdjuster() path_evaluator = PathEvaluator() path_planner = PathPlanningInterface() if self.show_viewer_simulation: self.robot.setupViewer(self.robot_file, self.environment_file) logging.info("HRF: Generating goal states...") problem_feasible = False while not problem_feasible: print "Creating random scene..." #self.create_random_obstacles(20) goal_states = get_goal_states("hrf", self.abs_path, self.serializer, self.obstacles, self.robot, self.max_velocity, self.delta_t, self.start_state, self.goal_position, self.goal_radius, self.planning_algortihm, self.path_timeout, self.num_generated_goal_states, self.continuous_collision, self.environment_file, self.num_cores) if len(goal_states) == 0: logging.error("HRF: Couldn't generate any goal states. Problem seems to be infeasible") else: problem_feasible = True '''obst = v_obstacle() obst[:] = self.obstacles self.robot.addObstacles(obst)''' logging.info("HRF: Generated " + str(len(goal_states)) + " goal states") sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward) path_planner.setup(self.robot, self.obstacles, self.max_velocity, self.delta_t, self.use_linear_path, self.planning_algortihm, self.path_timeout, self.continuous_collision, self.num_cores) if self.dynamic_problem: path_planner.setup_dynamic_problem(self.robot_file, self.environment_file, self.simulation_step_size, self.num_control_samples, self.min_control_duration, self.max_control_duration, self.add_intermediate_states, self.rrt_goal_bias, self.control_sampler) A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof) time_to_generate_paths = 0.0 if check_positive_definite([C, D]): m_covs = None if self.inc_covariance == "process": m_covs = np.linspace(self.min_process_covariance, self.max_process_covariance, self.covariance_steps) elif self.inc_covariance == "observation": m_covs = np.linspace(self.min_observation_covariance, self.max_observation_covariance, self.covariance_steps) for j in xrange(len(m_covs)): M = None N = None if self.inc_covariance == "process": """ The process noise covariance matrix """ M = self.calc_covariance_value(self.robot, m_covs[j], M_base) #M = m_covs[j] * M_base """ The observation error covariance matrix """ N = self.calc_covariance_value(self.robot, self.min_observation_covariance, N_base, covariance_type='observation') elif self.inc_covariance == "observation": M = self.calc_covariance_value(self.robot, self.min_process_covariance, M_base) N = self.calc_covariance_value(self.robot, m_covs[j], N_base, covariance_type='observation') path_planner.setup_path_evaluator(A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.robot_file, self.environment_file) sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot, self.enforce_control_constraints, self.obstacles, self.goal_position, self.goal_radius, self.max_velocity, self.show_viewer_simulation, self.robot_file, self.environment_file, self.knows_collision) path_evaluator.setup(A, B, C, D, H, M, N, V, W, self.robot, self.sample_size, self.obstacles, self.goal_position, self.goal_radius, self.show_viewer_evaluation, self.robot_file, self.environment_file, self.num_cores) path_evaluator.setup_reward_function(self.step_penalty, self.illegal_move_penalty, self.exit_reward, self.discount_factor) plan_adjuster.setup(self.robot, M, H, W, N, C, D, self.dynamic_problem, self.enforce_control_constraints) plan_adjuster.set_simulation_step_size(self.simulation_step_size) plan_adjuster.set_model_matrices(A, B, V) if not self.dynamic_problem: plan_adjuster.set_max_joint_velocities_linear_problem(np.array([self.max_velocity for i in xrange(self.robot_dof)])) sim.set_stop_when_colliding(self.replan_when_colliding) if self.dynamic_problem: path_evaluator.setup_dynamic_problem() sim.setup_dynamic_problem(self.simulation_step_size) path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor) mean_number_planning_steps = 0.0 number_of_steps = 0.0 mean_planning_time = 0.0 num_generated_paths_run = 0.0 successful_runs = 0 num_collisions = 0.0 linearization_error = 0.0 final_states= [] rewards_cov = [] for k in xrange(self.num_simulation_runs): print "HRF: Run " + str(k + 1) self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n") current_step = 0 x_true = np.array([self.start_state[m] for m in xrange(len(self.start_state))]) x_estimated = np.array([self.start_state[m] for m in xrange(len(self.start_state))]) #x_estimated[0] += 0.2 #x_estimated[0] = 0.4 #x_predicted = self.start_state P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) P_ext_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) estimated_deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) total_reward = 0.0 terminal = False last_x_true = None """ Obtain a nominal path """ print "plan" path_planner.set_start_and_goal(x_estimated, goal_states, self.goal_position, self.goal_radius) t0 = time.time() (xs, us, zs, control_durations, num_generated_paths, objective, state_covariances, deviation_covariances, estimated_deviation_covariances, mean_gen_times, mean_eval_times, total_gen_times, total_eval_times) = path_planner.plan_and_evaluate_paths(1, 0, current_step, self.evaluation_horizon, P_t, deviation_covariance, estimated_deviation_covariance, 0.0) print "objective " + str(objective) while True: print "current step " + str(current_step) '''if current_step == 7: x_true = np.array([last_x_true[k] for k in xrange(len(last_x_true))]) for k in xrange(len(last_x_true) / 2, len(last_x_true)): last_x_true[k] = 0.0''' """ Predict system state at t+1 using nominal path """ """ Get state matrices """ #As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(xs, us, control_durations) As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices([x_estimated], [us[0]], control_durations) """ Predict using EKF """ (x_predicted_temp, P_predicted) = kalman.predict_state(self.robot, x_estimated, us[0], control_durations[0], self.simulation_step_size, As[0], Bs[0], Vs[0], Ms[0], P_ext_t, self.dynamic_problem) """ Make sure x_predicted fulfills the constraints """ if self.enforce_constraints: x_predicted_temp = sim.check_constraints(x_predicted_temp) predicted_collided = True if not sim.is_in_collision([], x_predicted_temp)[0]: x_predicted = x_predicted_temp predicted_collided = False else: print "X_PREDICTED COLLIDES!" x_predicted = x_estimated for l in xrange(len(x_predicted) / 2, len(x_predicted)): x_predicted[l] = 0 last_x_true = np.array([x_true[k] for k in xrange(len(x_true))]) """ Execute path for 1 time step """ (x_true, x_tilde, x_tilde_linear, x_estimated_dash, z, P_t, current_step, total_reward, terminal, estimated_s, estimated_c, history_entries) = sim.simulate_n_steps(xs, us, zs, control_durations, x_true, x_estimated, P_t, total_reward, current_step, 1, 0.0, 0.0, max_num_steps=self.max_num_steps) history_entries[-1].set_best_reward(objective) """ Process history entries """ try: deviation_covariance = deviation_covariances[len(history_entries) - 1] estimated_deviation_covariance = estimated_deviation_covariances[len(history_entries) - 1] except: print "what: len(deviation_covariances) " + str(len(deviation_covariances)) print "len(history_entries) " + str(len(history_entries)) print "len(xs) " + str(len(xs)) history_entries[0].set_replanning(True) for l in xrange(len(history_entries)): try: history_entries[l].set_estimated_covariance(state_covariances[l]) except: print "l " + str(l) print "len(state_covariances) " + str(len(state_covariances)) if history_entries[l].collided: num_collisions += 1 linearization_error += history_entries[l].linearization_error if (current_step == self.max_num_steps) or terminal: history_entries[len(history_entries) - 2].set_best_reward(objective) history_entries[-1].set_best_reward(None) for l in xrange(len(history_entries)): history_entries[l].serialize(tmp_dir, "log.log") final_states.append(history_entries[-1].x_true) if terminal: print "Terminal state reached" successful_runs += 1 break """ Plan new trajectories from predicted state """ path_planner.set_start_and_goal(x_predicted, goal_states, self.goal_position, self.goal_radius) t0 = time.time() paths = path_planner.plan_paths(self.num_paths, 0, planning_timeout=self.timeout, min_num_paths=1) mean_planning_time += time.time() - t0 mean_number_planning_steps += 1.0 num_generated_paths_run += len(paths) """ Filter update """ (x_estimated_temp, P_ext_t) = kalman.filter_update(x_predicted, P_predicted, z, Hs[0], Ws[0], Ns[0]) """ Make sure x_estimated fulfills the constraints """ if self.enforce_constraints: x_estimated_temp = sim.check_constraints(x_estimated_temp) in_collision, colliding_obstacle = sim.is_in_collision([], x_estimated_temp) if in_collision: history_entries[-1].set_estimate_collided(True) for l in xrange(len(x_estimated) / 2, len(x_estimated)): x_estimated[l] = 0 elif history_entries[-1].collided and self.knows_collision: for l in xrange(len(x_estimated) / 2, len(x_estimated)): x_estimated[l] = 0 else: x_estimated = x_estimated_temp history_entries[-1].set_estimate_collided(False) history_entries[-1].serialize(tmp_dir, "log.log") """ Adjust plan """ t0 = time.time() (xs_adj, us_adj, zs_adj, control_durations_adj, adjusted) = plan_adjuster.adjust_plan(self.robot, (xs, us, zs, control_durations), x_estimated, P_t) if not adjusted: final_states.append(history_entries[-1].x_true) print "current step " + str(current_step) raise ValueError("Raised") break if self.show_viewer_simulation: sim.update_viewer(x_true, x_estimated, z, control_duration=0.03, colliding_obstacle=sim.colliding_obstacle) """ Evaluate the adjusted plan and the planned paths """ #if self.is_terminal(xs_adj[-1]) and not len(xs_adj)==1: if not len(xs_adj)<=1: """ Only evaluate the adjusted path if it's > 1 """ paths.extend([[xs_adj, us_adj, zs_adj, control_durations_adj]]) if len(paths) == 0: """ Running out of paths """ print "Error: Couldn't generate an evaluate any paths" final_states.append(history_entries[-1].x_true) break (path_index, xs, us, zs, control_durations, objective, state_covariances, deviation_covariances, estimated_deviation_covariances) = path_evaluator.evaluate_paths(paths, P_t, current_step) mean_planning_time += time.time() - t0 if path_index == None: """ We couldn't evaluate any paths """ final_states.append(history_entries[-1].x_true) break if self.show_viewer_simulation: self.visualize_paths(self.robot, [xs]) rewards_cov.append(total_reward) self.serializer.write_line("log.log", tmp_dir, "Reward: " + str(total_reward) + " \n") self.serializer.write_line("log.log", tmp_dir, "\n") number_of_steps += current_step mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps mean_planning_time /= self.num_simulation_runs mean_num_generated_paths_step = num_generated_paths_run / mean_number_planning_steps """ Calculate the distance to goal area """ ee_position_distances = [] for state in final_states: joint_angles = v_double() joint_angles[:] = [state[y] for y in xrange(len(state) / 2)] ee_position = v_double() self.robot.getEndEffectorPosition(joint_angles, ee_position) ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))]) dist = np.linalg.norm(np.array(self.goal_position - ee_position)) if dist < self.goal_radius: dist = 0.0 ee_position_distances.append(dist) logging.info("HRF: Done. total_reward is " + str(total_reward)) try: n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances)) except: print ee_position_distances self.serializer.write_line("log.log", tmp_dir, "################################# \n") self.serializer.write_line("log.log", tmp_dir, "inc_covariance: " + str(self.inc_covariance) + "\n") if self.inc_covariance == "process": self.serializer.write_line("log.log", tmp_dir, "Process covariance: " + str(m_covs[j]) + " \n") self.serializer.write_line("log.log", tmp_dir, "Observation covariance: " + str(self.min_observation_covariance) + " \n") elif self.inc_covariance == "observation": self.serializer.write_line("log.log", tmp_dir, "Process covariance: " + str(self.min_process_covariance) + " \n") self.serializer.write_line("log.log", tmp_dir, "Observation covariance: " + str(m_covs[j]) + " \n") mean_linearization_error = linearization_error / number_of_steps number_of_steps /= self.num_simulation_runs self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean number of generated paths per step: " + str(mean_num_generated_paths_step) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Average distance to goal area: " + str(mean_distance_to_goal) + " \n") self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n") print "succ " + str((100.0 / self.num_simulation_runs) * successful_runs) self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successful_runs) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n") n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov)) print "mean_rewards " + str(mean) #plt.plot_histogram_from_data(rewards_cov) #sleep self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n") self.serializer.write_line("log.log", tmp_dir, "Mean linearisation error: " + str(mean_linearization_error) + " \n") self.serializer.write_line("log.log", tmp_dir, "Reward standard deviation: " + str(np.sqrt(var)) + " \n") self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n") cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_hrf_" + str(m_covs[j]) + ".log" os.system(cmd) cmd = "cp " + self.abs_path + "/config_hrf.yaml " + dir os.system(cmd) if not os.path.exists(dir + "/environment"): os.makedirs(dir + "/environment") cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment" os.system(cmd) if not os.path.exists(dir + "/model"): os.makedirs(dir + "/model") cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model" os.system(cmd) print "Done."