Esempio n. 1
0
File: HRF.py Progetto: hebinbing/LQG
class HRF:
    def __init__(self):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        cmd = "rm -rf " + self.abs_path + "/tmp"
        popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True)
        popen.wait()
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_hrf.yaml",
                                             path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 42949672)
        np.random.seed(self.seed)

        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s',
                            level=logging_level)
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/hrf"
        tmp_dir = self.abs_path + "/tmp/hrf"
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("HRF: Couldn't initialize robot")
            return
        if not self.setup_scene(self.environment_file, self.robot):
            return

        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator()
        plan_adjuster = PlanAdjuster()
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()
        if self.show_viewer_simulation:
            self.robot.setupViewer(self.robot_file, self.environment_file)

        logging.info("HRF: Generating goal states...")
        problem_feasible = False
        while not problem_feasible:
            print "Creating random scene..."
            #self.create_random_obstacles(20)
            goal_states = get_goal_states(
                "hrf", self.abs_path, self.serializer, self.obstacles,
                self.robot, self.max_velocity, self.delta_t, self.start_state,
                self.goal_position, self.goal_radius, self.planning_algortihm,
                self.path_timeout, self.num_generated_goal_states,
                self.continuous_collision, self.environment_file,
                self.num_cores)

            if len(goal_states) == 0:
                logging.error(
                    "HRF: Couldn't generate any goal states. Problem seems to be infeasible"
                )
            else:
                problem_feasible = True
        '''obst = v_obstacle()
        obst[:] = self.obstacles
        self.robot.addObstacles(obst)'''
        logging.info("HRF: Generated " + str(len(goal_states)) +
                     " goal states")
        sim.setup_reward_function(self.discount_factor, self.step_penalty,
                                  self.illegal_move_penalty, self.exit_reward)
        path_planner.setup(self.robot, self.obstacles, self.max_velocity,
                           self.delta_t, self.use_linear_path,
                           self.planning_algortihm, self.path_timeout,
                           self.continuous_collision, self.num_cores)
        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(
                self.robot_file, self.environment_file,
                self.simulation_step_size, self.num_control_samples,
                self.min_control_duration, self.max_control_duration,
                self.add_intermediate_states, self.rrt_goal_bias,
                self.control_sampler)

        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(
            self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance,
                                     self.max_process_covariance,
                                     self.covariance_steps)
            elif self.inc_covariance == "observation":
                m_covs = np.linspace(self.min_observation_covariance,
                                     self.max_observation_covariance,
                                     self.covariance_steps)
        for j in xrange(len(m_covs)):
            M = None
            N = None
            if self.inc_covariance == "process":
                """ The process noise covariance matrix """
                M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
                #M = m_covs[j] * M_base
                """ The observation error covariance matrix """
                N = self.calc_covariance_value(self.robot,
                                               self.min_observation_covariance,
                                               N_base,
                                               covariance_type='observation')
            elif self.inc_covariance == "observation":
                M = self.calc_covariance_value(self.robot,
                                               self.min_process_covariance,
                                               M_base)
                N = self.calc_covariance_value(self.robot,
                                               m_covs[j],
                                               N_base,
                                               covariance_type='observation')

            path_planner.setup_path_evaluator(
                A, B, C, D, H, M, N, V, W, self.robot, self.sample_size,
                self.obstacles, self.goal_position, self.goal_radius,
                self.robot_file, self.environment_file)
            sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot,
                              self.enforce_control_constraints, self.obstacles,
                              self.goal_position, self.goal_radius,
                              self.max_velocity, self.show_viewer_simulation,
                              self.robot_file, self.environment_file,
                              self.knows_collision)
            path_evaluator.setup(A, B, C, D, H, M, N, V, W, self.robot,
                                 self.sample_size, self.obstacles,
                                 self.goal_position, self.goal_radius,
                                 self.show_viewer_evaluation, self.robot_file,
                                 self.environment_file, self.num_cores)
            path_evaluator.setup_reward_function(self.step_penalty,
                                                 self.illegal_move_penalty,
                                                 self.exit_reward,
                                                 self.discount_factor)
            plan_adjuster.setup(self.robot, M, H, W, N, C, D,
                                self.dynamic_problem,
                                self.enforce_control_constraints)
            plan_adjuster.set_simulation_step_size(self.simulation_step_size)
            plan_adjuster.set_model_matrices(A, B, V)
            if not self.dynamic_problem:
                plan_adjuster.set_max_joint_velocities_linear_problem(
                    np.array(
                        [self.max_velocity for i in xrange(self.robot_dof)]))
            sim.set_stop_when_colliding(self.replan_when_colliding)
            if self.dynamic_problem:
                path_evaluator.setup_dynamic_problem()
                sim.setup_dynamic_problem(self.simulation_step_size)
            path_planner.setup_reward_function(self.step_penalty,
                                               self.exit_reward,
                                               self.illegal_move_penalty,
                                               self.discount_factor)
            mean_number_planning_steps = 0.0
            number_of_steps = 0.0
            mean_planning_time = 0.0
            num_generated_paths_run = 0.0
            successful_runs = 0
            num_collisions = 0.0
            linearization_error = 0.0
            final_states = []
            rewards_cov = []
            for k in xrange(self.num_simulation_runs):
                print "HRF: Run " + str(k + 1)
                self.serializer.write_line("log.log", tmp_dir,
                                           "RUN #" + str(k + 1) + " \n")
                current_step = 0
                x_true = np.array([
                    self.start_state[m] for m in xrange(len(self.start_state))
                ])
                x_estimated = np.array([
                    self.start_state[m] for m in xrange(len(self.start_state))
                ])
                #x_estimated[0] += 0.2
                #x_estimated[0] = 0.4
                #x_predicted = self.start_state
                P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)]
                                for i in xrange(2 * self.robot_dof)])
                P_ext_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)]
                                    for i in xrange(2 * self.robot_dof)])
                deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                estimated_deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                total_reward = 0.0
                terminal = False
                last_x_true = None
                """
                Obtain a nominal path
                """
                print "plan"
                path_planner.set_start_and_goal(x_estimated, goal_states,
                                                self.goal_position,
                                                self.goal_radius)
                t0 = time.time()
                (xs, us, zs, control_durations, num_generated_paths, objective,
                 state_covariances, deviation_covariances,
                 estimated_deviation_covariances, mean_gen_times,
                 mean_eval_times, total_gen_times,
                 total_eval_times) = path_planner.plan_and_evaluate_paths(
                     1, 0, current_step, self.evaluation_horizon, P_t,
                     deviation_covariance, estimated_deviation_covariance, 0.0)
                print "objective " + str(objective)

                while True:
                    print "current step " + str(current_step)
                    '''if current_step == 7:
                        x_true = np.array([last_x_true[k] for k in xrange(len(last_x_true))])
                        for k in xrange(len(last_x_true) / 2, len(last_x_true)):
                            last_x_true[k] = 0.0'''
                    """
                    Predict system state at t+1 using nominal path
                    """
                    """ Get state matrices """
                    #As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(xs, us, control_durations)
                    As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(
                        [x_estimated], [us[0]], control_durations)
                    """ Predict using EKF """
                    (x_predicted_temp, P_predicted) = kalman.predict_state(
                        self.robot, x_estimated, us[0], control_durations[0],
                        self.simulation_step_size, As[0], Bs[0], Vs[0], Ms[0],
                        P_ext_t, self.dynamic_problem)
                    """ Make sure x_predicted fulfills the constraints """
                    if self.enforce_constraints:
                        x_predicted_temp = sim.check_constraints(
                            x_predicted_temp)
                    predicted_collided = True
                    if not sim.is_in_collision([], x_predicted_temp)[0]:
                        x_predicted = x_predicted_temp
                        predicted_collided = False
                    else:
                        print "X_PREDICTED COLLIDES!"
                        x_predicted = x_estimated
                        for l in xrange(
                                len(x_predicted) / 2, len(x_predicted)):
                            x_predicted[l] = 0

                    last_x_true = np.array(
                        [x_true[k] for k in xrange(len(x_true))])
                    """
                    Execute path for 1 time step
                    """
                    (x_true, x_tilde, x_tilde_linear, x_estimated_dash, z, P_t,
                     current_step, total_reward, terminal, estimated_s,
                     estimated_c, history_entries) = sim.simulate_n_steps(
                         xs,
                         us,
                         zs,
                         control_durations,
                         x_true,
                         x_estimated,
                         P_t,
                         total_reward,
                         current_step,
                         1,
                         0.0,
                         0.0,
                         max_num_steps=self.max_num_steps)

                    history_entries[-1].set_best_reward(objective)
                    """
                    Process history entries
                    """
                    try:
                        deviation_covariance = deviation_covariances[
                            len(history_entries) - 1]
                        estimated_deviation_covariance = estimated_deviation_covariances[
                            len(history_entries) - 1]
                    except:
                        print "what: len(deviation_covariances) " + str(
                            len(deviation_covariances))
                        print "len(history_entries) " + str(
                            len(history_entries))
                        print "len(xs) " + str(len(xs))

                    history_entries[0].set_replanning(True)
                    for l in xrange(len(history_entries)):
                        try:
                            history_entries[l].set_estimated_covariance(
                                state_covariances[l])
                        except:
                            print "l " + str(l)
                            print "len(state_covariances) " + str(
                                len(state_covariances))

                        if history_entries[l].collided:
                            num_collisions += 1
                        linearization_error += history_entries[
                            l].linearization_error
                    if (current_step == self.max_num_steps) or terminal:
                        history_entries[len(history_entries) -
                                        2].set_best_reward(objective)
                        history_entries[-1].set_best_reward(None)
                        for l in xrange(len(history_entries)):
                            history_entries[l].serialize(tmp_dir, "log.log")
                        final_states.append(history_entries[-1].x_true)
                        if terminal:
                            print "Terminal state reached"
                            successful_runs += 1
                        break
                    """
                    Plan new trajectories from predicted state
                    """
                    path_planner.set_start_and_goal(x_predicted, goal_states,
                                                    self.goal_position,
                                                    self.goal_radius)
                    t0 = time.time()
                    paths = path_planner.plan_paths(
                        self.num_paths,
                        0,
                        planning_timeout=self.timeout,
                        min_num_paths=1)
                    mean_planning_time += time.time() - t0
                    mean_number_planning_steps += 1.0
                    num_generated_paths_run += len(paths)
                    """
                    Filter update
                    """
                    (x_estimated_temp,
                     P_ext_t) = kalman.filter_update(x_predicted, P_predicted,
                                                     z, Hs[0], Ws[0], Ns[0])
                    """ Make sure x_estimated fulfills the constraints """
                    if self.enforce_constraints:
                        x_estimated_temp = sim.check_constraints(
                            x_estimated_temp)
                    in_collision, colliding_obstacle = sim.is_in_collision(
                        [], x_estimated_temp)
                    if in_collision:
                        history_entries[-1].set_estimate_collided(True)
                        for l in xrange(
                                len(x_estimated) / 2, len(x_estimated)):
                            x_estimated[l] = 0
                    elif history_entries[-1].collided and self.knows_collision:
                        for l in xrange(
                                len(x_estimated) / 2, len(x_estimated)):
                            x_estimated[l] = 0
                    else:
                        x_estimated = x_estimated_temp
                        history_entries[-1].set_estimate_collided(False)

                    history_entries[-1].serialize(tmp_dir, "log.log")
                    """
                    Adjust plan
                    """
                    t0 = time.time()
                    (xs_adj, us_adj, zs_adj, control_durations_adj,
                     adjusted) = plan_adjuster.adjust_plan(
                         self.robot, (xs, us, zs, control_durations),
                         x_estimated, P_t)
                    if not adjusted:
                        final_states.append(history_entries[-1].x_true)
                        print "current step " + str(current_step)
                        raise ValueError("Raised")
                        break
                    if self.show_viewer_simulation:
                        sim.update_viewer(
                            x_true,
                            x_estimated,
                            z,
                            control_duration=0.03,
                            colliding_obstacle=sim.colliding_obstacle)
                    """
                    Evaluate the adjusted plan and the planned paths
                    """
                    #if self.is_terminal(xs_adj[-1]) and not len(xs_adj)==1:
                    if not len(xs_adj) <= 1:
                        """
                        Only evaluate the adjusted path if it's > 1
                        """
                        paths.extend(
                            [[xs_adj, us_adj, zs_adj, control_durations_adj]])
                    if len(paths) == 0:
                        """
                        Running out of paths
                        """
                        print "Error: Couldn't generate an evaluate any paths"
                        final_states.append(history_entries[-1].x_true)
                        break

                    (path_index, xs, us, zs, control_durations, objective,
                     state_covariances, deviation_covariances,
                     estimated_deviation_covariances
                     ) = path_evaluator.evaluate_paths(paths, P_t,
                                                       current_step)
                    mean_planning_time += time.time() - t0
                    if path_index == None:
                        """
                        We couldn't evaluate any paths
                        """
                        final_states.append(history_entries[-1].x_true)
                        break

                    if self.show_viewer_simulation:
                        self.visualize_paths(self.robot, [xs])

                rewards_cov.append(total_reward)
                self.serializer.write_line(
                    "log.log", tmp_dir, "Reward: " + str(total_reward) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "\n")
                number_of_steps += current_step
            mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps
            mean_planning_time /= self.num_simulation_runs
            mean_num_generated_paths_step = num_generated_paths_run / mean_number_planning_steps
            """ Calculate the distance to goal area
            """
            ee_position_distances = []
            for state in final_states:
                joint_angles = v_double()
                joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
                ee_position = v_double()
                self.robot.getEndEffectorPosition(joint_angles, ee_position)
                ee_position = np.array(
                    [ee_position[y] for y in xrange(len(ee_position))])
                dist = np.linalg.norm(
                    np.array(self.goal_position - ee_position))
                if dist < self.goal_radius:
                    dist = 0.0
                ee_position_distances.append(dist)
            logging.info("HRF: Done. total_reward is " + str(total_reward))
            try:
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(
                    np.array(ee_position_distances))
            except:
                print ee_position_distances

            self.serializer.write_line("log.log", tmp_dir,
                                       "################################# \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "inc_covariance: " + str(self.inc_covariance) + "\n")
            if self.inc_covariance == "process":
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Process covariance: " + str(m_covs[j]) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir, "Observation covariance: " +
                    str(self.min_observation_covariance) + " \n")
            elif self.inc_covariance == "observation":
                self.serializer.write_line(
                    "log.log", tmp_dir, "Process covariance: " +
                    str(self.min_process_covariance) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Observation covariance: " + str(m_covs[j]) + " \n")
            mean_linearization_error = linearization_error / number_of_steps
            number_of_steps /= self.num_simulation_runs
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Mean number of steps: " + str(number_of_steps) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Mean number of generated paths per step: " +
                str(mean_num_generated_paths_step) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean num collisions per run: " +
                str(float(num_collisions) / float(self.num_simulation_runs)) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Average distance to goal area: " +
                str(mean_distance_to_goal) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Num successes: " + str(successful_runs) + " \n")
            print "succ " + str(
                (100.0 / self.num_simulation_runs) * successful_runs)
            self.serializer.write_line(
                "log.log", tmp_dir, "Percentage of successful runs: " + str(
                    (100.0 / self.num_simulation_runs) * successful_runs) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per run: " +
                str(mean_planning_time) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per planning step: " +
                str(mean_planning_time_per_step) + " \n")

            n, min_max, mean, var, skew, kurt = scipy.stats.describe(
                np.array(rewards_cov))
            print "mean_rewards " + str(mean)
            #plt.plot_histogram_from_data(rewards_cov)
            #sleep
            self.serializer.write_line("log.log", tmp_dir,
                                       "Mean rewards: " + str(mean) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Reward variance: " + str(var) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean linearisation error: " +
                str(mean_linearization_error) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Seed: " + str(self.seed) + " \n")
            cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_hrf_" + str(
                m_covs[j]) + ".log"
            os.system(cmd)
        cmd = "cp " + self.abs_path + "/config_hrf.yaml " + dir
        os.system(cmd)

        if not os.path.exists(dir + "/environment"):
            os.makedirs(dir + "/environment")

        cmd = "cp " + self.abs_path + "/" + str(
            self.environment_file) + " " + str(dir) + "/environment"
        os.system(cmd)

        if not os.path.exists(dir + "/model"):
            os.makedirs(dir + "/model")

        cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
        os.system(cmd)
        print "Done."

    def is_terminal(self, x):
        """
        Check if x is terminal
        """
        state = v_double()
        state[:] = [x[j] for j in xrange(len(x) / 2)]
        ee_position_arr = v_double()
        self.robot.getEndEffectorPosition(state, ee_position_arr)
        ee_position = np.array(
            [ee_position_arr[j] for j in xrange(len(ee_position_arr))])
        norm = np.linalg.norm(ee_position - self.goal_position)
        if norm - 0.01 <= self.goal_radius:
            return True
        return False

    def visualize_x(self, robot, x, color=None):
        cjvals = v_double()
        cjvels = v_double()
        cjvals[:] = [x[i] for i in xrange(len(x) / 2)]
        cjvels[:] = [x[i] for i in xrange(len(x) / 2, len(x))]
        particle_joint_values = v2_double()
        robot.updateViewerValues(cjvals, cjvels, particle_joint_values,
                                 particle_joint_values)

    def visualize_paths(self, robot, paths, colors=None):
        robot.removePermanentViewerParticles()
        particle_joint_values = v2_double()
        particle_joint_colors = v2_double()
        pjvs = []
        for k in xrange(len(paths)):
            for p in paths[k]:
                particle = v_double()
                particle[:] = [p[i] for i in xrange(len(p) / 2)]
                pjvs.append(particle)
                if colors == None:
                    color = v_double()
                    color[:] = [0.0, 0.0, 0.0, 0.7]
                    particle_joint_colors.append(color)
                else:
                    c = v_double()
                    c[:] = color
                    particle_joint_colors.append(c)

        particle_joint_values[:] = pjvs
        self.robot.addPermanentViewerParticles(particle_joint_values,
                                               particle_joint_colors)

    def init_serializer(self):
        self.serializer = Serializer()
        self.serializer.create_temp_dir(self.abs_path, "hrf")

    def setup_viewer(self, robot):
        robot.setupViewer(model_file, env_file)

    def init_robot(self, urdf_model_file):
        self.robot = Robot(self.abs_path + "/" + urdf_model_file)
        self.robot.enforceConstraints(self.enforce_constraints)
        self.robot.setGravityConstant(self.gravity_constant)
        self.robot.setAccelerationLimit(self.acceleration_limit)
        """ Setup operations """
        self.robot_dof = self.robot.getDOF()
        if len(self.start_state) != 2 * self.robot_dof:
            logging.error(
                "HRF: Start state dimension doesn't fit to the robot state space dimension"
            )
            return False
        return True

    def setup_scene(self, environment_file, robot):
        """ Load the obstacles """
        self.obstacles = self.utils.loadObstaclesXML(self.abs_path + "/" +
                                                     environment_file)
        """ Load the goal area """
        goal_area = v_double()
        self.utils.loadGoalArea(self.abs_path + "/" + environment_file,
                                goal_area)
        if len(goal_area) == 0:
            print "ERROR: Your environment file doesn't define a goal area"
            return False
        self.goal_position = [goal_area[i] for i in xrange(0, 3)]
        self.goal_radius = goal_area[3]
        return True

    def create_random_obstacles(self, n):
        self.obstacles = []
        for i in xrange(n):
            name = "obst_" + str(i)
            x_pos = np.random.uniform(-1.0, 4.0)
            y_pos = np.random.uniform(-1.0, 4.0)
            z_pos = np.random.uniform(3.0, 6.0)

            x_size = 0.25
            y_size = 0.25
            z_size = 0.25

            obst = self.utils.generateObstacle(name, x_pos, y_pos, z_pos,
                                               x_size, y_size, z_size)
            self.obstacles.append(obst[0])

    def clear_stats(self, dir):
        if os.path.isdir(dir):
            cmd = "rm -rf " + dir + "/*"
            os.system(cmd)
        else:
            os.makedirs(dir)

    def calc_covariance_value(self,
                              robot,
                              error,
                              covariance_matrix,
                              covariance_type='process'):
        active_joints = v_string()
        robot.getActiveJoints(active_joints)
        if covariance_type == 'process':
            if self.dynamic_problem:
                torque_limits = v_double()
                robot.getJointTorqueLimits(active_joints, torque_limits)
                torque_limits = [
                    torque_limits[i] for i in xrange(len(torque_limits))
                ]
                for i in xrange(len(torque_limits)):
                    torque_range = 2.0 * torque_limits[i]
                    covariance_matrix[i, i] = np.square(
                        (torque_range / 100.0) * error)
            else:
                for i in xrange(self.robot_dof):
                    covariance_matrix[i, i] = np.square(
                        (self.max_velocity / 100.0) * error)
        else:
            lower_position_limits = v_double()
            upper_position_limits = v_double()
            velocity_limits = v_double()
            robot.getJointLowerPositionLimits(active_joints,
                                              lower_position_limits)
            robot.getJointUpperPositionLimits(active_joints,
                                              upper_position_limits)
            robot.getJointVelocityLimits(active_joints, velocity_limits)
            for i in xrange(self.robot_dof):
                position_range = upper_position_limits[
                    i] - lower_position_limits[i]
                covariance_matrix[i, i] = np.square(
                    (position_range / 100.0) * error)
                velocity_range = 2.0 * velocity_limits[i]
                covariance_matrix[i + self.robot_dof,
                                  i + self.robot_dof] = np.square(
                                      (velocity_range / 100.0) * error)
        return covariance_matrix

    def problem_setup(self, delta_t, num_links):
        A = np.identity(num_links * 2)
        H = np.identity(num_links * 2)
        W = np.identity(num_links * 2)
        C = self.path_deviation_cost * np.identity(num_links * 2)
        '''B = delta_t * np.identity(num_links * 2)                
        V = np.identity(num_links * 2)
        D = self.control_deviation_cost * np.identity(num_links * 2)        
        M_base = np.identity(2 * self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)'''

        B = delta_t * np.vstack(
            (np.identity(num_links), np.zeros((num_links, num_links))))
        V = np.vstack((np.identity(num_links), np.zeros(
            (num_links, num_links))))
        D = self.control_deviation_cost * np.identity(num_links)
        M_base = np.identity(self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)

        return A, H, B, V, W, C, D, M_base, N_base

    def set_params(self, config):
        self.num_paths = config['num_generated_paths']
        self.use_linear_path = config['use_linear_path']
        self.max_velocity = config['max_velocity']
        self.delta_t = 1.0 / config['control_rate']
        self.start_state = config['start_state']
        self.num_simulation_runs = config['num_simulation_runs']
        self.num_bins = config['num_bins']
        self.min_process_covariance = config['min_process_covariance']
        self.max_process_covariance = config['max_process_covariance']
        self.covariance_steps = config['covariance_steps']
        self.min_observation_covariance = config['min_observation_covariance']
        self.max_observation_covariance = config['max_observation_covariance']
        self.discount_factor = config['discount_factor']
        self.illegal_move_penalty = config['illegal_move_penalty']
        self.step_penalty = config['step_penalty']
        self.exit_reward = config['exit_reward']
        self.stop_when_terminal = config['stop_when_terminal']
        self.enforce_constraints = config['enforce_constraints']
        self.enforce_control_constraints = config[
            'enforce_control_constraints']
        self.sample_size = config['sample_size']
        self.plot_paths = config['plot_paths']
        self.planning_algortihm = config['planning_algorithm']
        self.dynamic_problem = config['dynamic_problem']
        self.simulation_step_size = config['simulation_step_size']
        self.path_timeout = config['path_timeout']
        self.continuous_collision = config['continuous_collision_check']
        self.show_viewer_evaluation = config['show_viewer_evaluation']
        self.show_viewer_simulation = config['show_viewer_simulation']
        self.path_deviation_cost = config['path_deviation_cost']
        self.control_deviation_cost = config['control_deviation_cost']
        self.num_control_samples = config['num_control_samples']
        self.min_control_duration = config['min_control_duration']
        self.max_control_duration = config['max_control_duration']
        self.inc_covariance = config['inc_covariance']
        self.add_intermediate_states = config['add_intermediate_states']
        self.gravity_constant = config['gravity']
        self.num_generated_goal_states = config['num_generated_goal_states']
        self.robot_file = config['robot_file']
        self.environment_file = config['environment_file']
        self.rrt_goal_bias = config['rrt_goal_bias']
        self.control_sampler = config['control_sampler']
        self.max_num_steps = config['max_num_steps']
        self.evaluation_horizon = config['horizon']
        self.timeout = config['timeout']
        self.seed = config['seed']
        self.num_cores = config['num_cores']
        self.replan_when_colliding = config['replan_when_colliding']
        self.acceleration_limit = config['acceleration_limit']
        self.knows_collision = config['knows_collision']
Esempio n. 2
0
class MPC:
    def __init__(self, plot):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_mpc.yaml", path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 4294967295)
        np.random.seed(self.seed)
        
        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level)        
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/mpc"
        tmp_dir = self.abs_path + "/tmp/mpc"       
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("MPC: Couldn't initialize robot")
            return               
        if not self.setup_scene(self.environment_file, self.robot):
            return   
        #self.run_viewer(self.robot_file, self.environment_file)     
        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator()        
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()
        logging.info("MPC: Generating goal states...")        
        goal_states = get_goal_states("mpc",
                                      self.abs_path,
                                      self.serializer, 
                                      self.obstacles,                                                                           
                                      self.robot,                                    
                                      self.max_velocity,
                                      self.delta_t,
                                      self.start_state,
                                      self.goal_position,
                                      self.goal_radius,
                                      self.planning_algortihm,
                                      self.path_timeout,
                                      self.num_generated_goal_states,
                                      self.continuous_collision,
                                      self.environment_file,
                                      self.num_cores)
        
        if len(goal_states) == 0:
            logging.error("MPC: Couldn't generate any goal states. Problem seems to be infeasible")
            return
        logging.info("MPC: Generated " + str(len(goal_states)) + " goal states")
        sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward)
        path_planner.setup(self.robot,                         
                           self.obstacles,  
                           self.max_velocity, 
                           self.delta_t, 
                           self.use_linear_path,
                           self.planning_algortihm,
                           self.path_timeout,
                           self.continuous_collision,
                           self.num_cores)
        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(self.robot_file,
                                               self.environment_file,
                                               self.simulation_step_size,
                                               self.num_control_samples,
                                               self.min_control_duration,
                                               self.max_control_duration,
                                               self.add_intermediate_states,
                                               self.rrt_goal_bias,
                                               self.control_sampler)
             
        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance, 
                                     self.max_process_covariance, 
                                     self.covariance_steps)                 
            elif self.inc_covariance == "observation":          
                m_covs = np.linspace(self.min_observation_covariance, 
                                     self.max_observation_covariance,
                                     self.covariance_steps)
        for j in xrange(len(m_covs)):
            M = None
            N = None
            if self.inc_covariance == "process":
                """ The process noise covariance matrix """
                M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
                #M = m_covs[j] * M_base
                    
                """ The observation error covariance matrix """
                N = self.calc_covariance_value(self.robot, 
                                               self.min_observation_covariance, 
                                               N_base, 
                                               covariance_type='observation')                    
            elif self.inc_covariance == "observation":
                M = self.calc_covariance_value(self.robot, 
                                               self.min_process_covariance,
                                               M_base)
                N = self.calc_covariance_value(self.robot, 
                                               m_covs[j],
                                               N_base, 
                                               covariance_type='observation')                    
            P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)])
            
            path_planner.setup_path_evaluator(A, B, C, D, H, M, N, V, W,                                     
                                              self.robot, 
                                              self.sample_size, 
                                              self.obstacles,
                                              self.goal_position,
                                              self.goal_radius,                                              
                                              self.robot_file,
                                              self.environment_file)
            sim.setup_problem(A, B, C, D, H, V, W, M, N,
                              self.robot, 
                              self.enforce_control_constraints,
                              self.obstacles, 
                              self.goal_position, 
                              self.goal_radius,
                              self.max_velocity,                                  
                              self.show_viewer_simulation,
                              self.robot_file,
                              self.environment_file)
            sim.set_stop_when_colliding(self.replan_when_colliding)
            if self.dynamic_problem:
                path_evaluator.setup_dynamic_problem()
                sim.setup_dynamic_problem(self.simulation_step_size)
            path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor)
            mean_number_planning_steps = 0.0
            number_of_steps = 0.0
            mean_planning_time = 0.0
            num_generated_paths_run = 0.0
            successful_runs = 0
            num_collisions = 0.0
            final_states= []
            rewards_cov = []
            for k in xrange(self.num_simulation_runs):
                print "MPC: Run " + str(k + 1)                                
                self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n")
                current_step = 0
                x_true = self.start_state
                x_estimate = self.start_state
                x_tilde_linear = np.array([0.0 for i in xrange(2 * self.robot_dof)])
                P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) 
                deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])
                estimated_deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])              
                total_reward = 0.0
                terminal = False
                while current_step < self.max_num_steps and not terminal:
                    path_planner.set_start_and_goal(x_estimate, goal_states, self.goal_position, self.goal_radius)
                    t0 = time.time()
                    (xs, 
                     us, 
                     zs,
                     control_durations, 
                     num_generated_paths, 
                     best_val,
                     state_covariances,
                     deviation_covariances,
                     estimated_deviation_covariances, 
                     mean_gen_times, 
                     mean_eval_times,
                     total_gen_times,
                     total_eval_times) = path_planner.plan_and_evaluate_paths(self.num_paths, 
                                                                              0, 
                                                                              current_step, 
                                                                              self.evaluation_horizon, 
                                                                              P_t,
                                                                              deviation_covariance,
                                                                              estimated_deviation_covariance, 
                                                                              self.timeout)
                    mean_planning_time += time.time() - t0
                    mean_number_planning_steps += 1.0  
                    num_generated_paths_run += num_generated_paths
                    if len(xs) == 0:
                        logging.error("MPC: Couldn't find any paths from start state" + 
                                      str(x_estimate) + 
                                      " to goal states") 
                                      
                        total_reward = np.array([-self.illegal_move_penalty])[0]
                        current_step += 1                                             
                        break
                    x_tilde = np.array([0.0 for i in xrange(2 * self.robot_dof)])
                    n_steps = self.num_execution_steps
                    
                    if n_steps > len(xs) - 1:
                       n_steps = len(xs) - 1
                    if current_step + n_steps > self.max_num_steps:
                        n_steps = self.max_num_steps - current_step
                    (x_true, 
                     x_tilde,
                     x_tilde_linear, 
                     x_estimate, 
                     P_t, 
                     current_step, 
                     total_reward,
                     terminal,
                     estimated_s,
                     estimated_c,
                     history_entries) = sim.simulate_n_steps(xs, us, zs,
                                                             control_durations,
                                                             x_true,                                                              
                                                             x_tilde,
                                                             x_tilde_linear,
                                                             x_estimate,
                                                             P_t,
                                                             total_reward,                                                                 
                                                             current_step,
                                                             n_steps,
                                                             0.0,
                                                             0.0,
                                                             max_num_steps=self.max_num_steps)
                    #print "len(hist) " + str(len(history_entries))
                    #print "len(deviation_covariances) " + str(len(deviation_covariances))
                    #print "len(estimated_deviation_covariances) " + str(len(estimated_deviation_covariances))
                    deviation_covariance = deviation_covariances[len(history_entries) - 1]
                    estimated_deviation_covariance = estimated_deviation_covariances[len(history_entries) - 1]
                    
                    if (current_step == self.max_num_steps) or terminal:
                        final_states.append(history_entries[-1].x_true)
                        print "len " + str(len(history_entries))
                        print "t " + str(history_entries[-1].t)
                        if terminal:
                            successful_runs += 1
                        
                    history_entries[0].set_replanning(True)                        
                    for l in xrange(len(history_entries)):
                        history_entries[l].set_estimated_covariance(state_covariances[l])                        
                        history_entries[l].serialize(tmp_dir, "log.log")
                        if history_entries[l].collided:                            
                            num_collisions += 1
                            collided = True
                    #logging.warn("MPC: Execution finished. True state is " + str(x_true))
                    #logging.warn("MPC: Estimated state is " + str(x_estimate))
                    logging.warn("MPC: Executed " + str(current_step) + " steps") 
                    logging.warn("MPC: terminal " + str(terminal))
                    if terminal:
                        print "MPC: Final state: " + str(x_true)
                rewards_cov.append(total_reward)
                self.serializer.write_line("log.log", 
                                           tmp_dir,
                                           "Reward: " + str(total_reward) + " \n") 
                self.serializer.write_line("log.log", 
                                           tmp_dir,
                                           "\n")
                number_of_steps += current_step
            mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps 
            mean_planning_time /= self.num_simulation_runs
                                
            """ Calculate the distance to goal area
            """  
            ee_position_distances = [] 
            for state in final_states:
                joint_angles = v_double()
                joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
                ee_position = v_double()
                self.robot.getEndEffectorPosition(joint_angles, ee_position)
                ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))])
                dist = np.linalg.norm(np.array(self.goal_position - ee_position))
                if dist < self.goal_radius:
                    dist = 0.0
                ee_position_distances.append(dist)
            logging.info("MPC: Done. total_reward is " + str(total_reward))
            try:
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances))
            except:
                print ee_position_distances
                sleep                         
                
            self.serializer.write_line("log.log", tmp_dir, "################################# \n")
            self.serializer.write_line("log.log",
                                       tmp_dir,
                                       "inc_covariance: " + str(self.inc_covariance) + "\n")
            if self.inc_covariance == "process":                  
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Process covariance: " + str(m_covs[j]) + " \n")
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Observation covariance: " + str(self.min_observation_covariance) + " \n")
            elif self.inc_covariance == "observation":                    
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Process covariance: " + str(self.min_process_covariance) + " \n")
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Observation covariance: " + str(m_covs[j]) + " \n")
            number_of_steps /= self.num_simulation_runs
            self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n")                            
            self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n")
            self.serializer.write_line("log.log", 
                                       tmp_dir, 
                                       "Average distance to goal area: " + str(mean_distance_to_goal) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n")
            print "succ " + str((100.0 / self.num_simulation_runs) * successful_runs)
            self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successful_runs) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n")
            
            n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov))
            print "mean_rewards " + str(mean)
            #plt.plot_histogram_from_data(rewards_cov)
            #sleep
            self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n")
            self.serializer.write_line("log.log", 
                                       tmp_dir, 
                                       "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n")
            cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_mpc_" + str(m_covs[j]) + ".log"
            os.system(cmd)
        
        cmd = "cp " + self.abs_path + "/config_mpc.yaml " + dir           
        os.system(cmd)
        
        if not os.path.exists(dir + "/environment"):
            os.makedirs(dir + "/environment") 
            
        cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment"
        os.system(cmd) 
            
        if not os.path.exists(dir + "/model"):
            os.makedirs(dir + "/model")
                
        cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
        os.system(cmd)
        print "Done."
        
                
    def init_robot(self, urdf_model_file):
        self.robot = Robot(self.abs_path + "/" + urdf_model_file)
        self.robot.enforceConstraints(self.enforce_constraints)
        self.robot.setGravityConstant(self.gravity_constant)
        self.robot.setAccelerationLimit(self.acceleration_limit)
        """ Setup operations """
        self.robot_dof = self.robot.getDOF()        
        if len(self.start_state) != 2 * self.robot_dof:
            logging.error("MPC: Start state dimension doesn't fit to the robot state space dimension")
            return False
        return True
    
    def run_viewer(self, model_file, env_file):
        show_viewer = True
        rot = v_double()
        trans = v_double()
        rot[:] = [-1.0, 0.0, 0.0, 0.0]
        trans[:] = [0.0, 0.0, 3.0]
        if show_viewer:
            self.robot.setViewerBackgroundColor(0.6, 0.8, 0.6)
            self.robot.setViewerSize(1280, 768)
            self.robot.setupViewer(model_file, env_file)
        fx = 0.0
        fy = 0.0
        fz = 0.0
        f_roll = 0.0
        f_pitch = 0.0
        f_yaw = 0.0         
        x = [0.0 for i in xrange(2 * self.robot_dof)]
        x = [0.8110760662014179,
             -0.2416850600576381,
             1.5922944779127346,
             5.1412306972285471,
             -10.0,
             2.5101115097604483]
                
        integration_times = [] 
        collision_check_times1 = []
        collision_check_times2 = []     
        
        y = 0
        while True:                    
            #u_in = [3.0, 1.5, 0.0, 0.0, 0.0, 0.0]
            u_in = [0.0 for i in xrange(self.robot_dof)]
            #u_in[0] = 150.0
            #u_in[1] = 70.0
            current_state = v_double()            
            current_state[:] = x            
            control = v_double()            
            control[:] = u_in
                       
            control_error = v_double()
            ce = [0.0 for i in xrange(self.robot_dof)]
            control_error[:] = ce
            result = v_double()            
            
            t0 = time.time()                    
            self.robot.propagate(current_state,
                                 control,
                                 control_error,
                                 self.simulation_step_size,
                                 0.03,
                                 result)
            t = time.time() - t0
            integration_times.append(t)
            if y == 10000:
                t_sum = sum(integration_times)
                t_mean = t_sum / len(integration_times)
                print "mean integration times: " + str(t_mean)
                t_sum = sum(collision_check_times1)
                t_mean = t_sum / len(collision_check_times1)
                print "mean collision check times old " + str(t_mean)
                t_mean = sum(collision_check_times2) / len(collision_check_times2)
                print "mean collision check times new " + str(t_mean)
                sleep            
            
            ja_start = v_double()            
            ja_start[:] = [current_state[i] for i in xrange(len(current_state) / 2)]                     
            collision_objects_start = self.robot.createRobotCollisionObjects(ja_start)
            joint_angles = v_double()
            joint_angles[:] = [result[i] for i in xrange(len(result) / 2)]
            collision_objects_goal = self.robot.createRobotCollisionObjects(joint_angles)
            #print "ee_velocity " + str([ee_velocity[i] for i in xrange(len(ee_velocity))])
            t_coll_check1 = time.time()            
            t2 = time.time() - t_coll_check1
            collision_check_times2.append(t2)            
            
            '''
            Get the end effector position
            '''
            #ee_position = v_double()            
            #self.robot.getEndEffectorPosition(joint_angles, ee_position)                      
            #ee_collision_objects = self.robot.createEndEffectorCollisionObject(joint_angles)
            in_collision = False
            t_c = time.time()                      
            for o in self.obstacles:                
                if o.inCollisionDiscrete(collision_objects_goal):                                                            
                    in_collision = True
                    break                              
                '''for i in xrange(len(collision_objects_start)):                        
                    if o.inCollisionContinuous([collision_objects_start[i], collision_objects_goal[i]]):
                        in_collision = True
                        break'''               
            if in_collision:
                print "COLLISION"
                
                
                    
            t = time.time() - t_c            
            collision_check_times1.append(t)          
                                
                                                                          
            x = np.array([result[i] for i in xrange(len(result))])
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x[i] for i in xrange(len(x) / 2)]
            cjvels_arr = [x[i] for i in xrange(len(x) / 2, len(x))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            if show_viewer:
                self.robot.updateViewerValues(cjvals, 
                                              cjvels,
                                              particle_joint_values,
                                              particle_joint_values)
            time.sleep(0.03) 
            
            y += 1
            print y
    
    def setup_scene(self,                    
                    environment_file,
                    robot):
        """ Load the obstacles """         
        self.obstacles = self.utils.loadObstaclesXML(self.abs_path + "/" + environment_file)      
        
        """ Load the goal area """
        goal_area = v_double()
        self.utils.loadGoalArea(self.abs_path + "/" + environment_file, goal_area)
        if len(goal_area) == 0:
            print "ERROR: Your environment file doesn't define a goal area"
            return False
        self.goal_position = [goal_area[i] for i in xrange(0, 3)]
        self.goal_radius = goal_area[3]
        return True
                
    def init_serializer(self):        
        self.serializer = Serializer()
        self.serializer.create_temp_dir(self.abs_path, "mpc") 
                
    def clear_stats(self, dir):
        if os.path.isdir(dir):
            cmd = "rm -rf " + dir + "/*"            
            os.system(cmd)
        else:
            os.makedirs(dir)
                
    def get_average_distance_to_goal_area(self, goal_position, goal_radius, cartesian_coords):        
        avg_dist = 0.0
        goal_pos = np.array(goal_position)        
        for i in xrange(len(cartesian_coords)):            
            cart = np.array(cartesian_coords[i])            
            dist = np.linalg.norm(goal_pos - cart)            
            if dist < goal_radius:
                dist = 0.0
            avg_dist += dist
        if avg_dist == 0.0:
            return avg_dist        
        return np.asscalar(avg_dist) / len(cartesian_coords)
            
    def calc_covariance_value(self, robot, error, covariance_matrix, covariance_type='process'):
        active_joints = v_string()
        robot.getActiveJoints(active_joints)        
        if covariance_type == 'process':
            if self.dynamic_problem:            
                torque_limits = v_double()
                robot.getJointTorqueLimits(active_joints, torque_limits)
                torque_limits = [torque_limits[i] for i in xrange(len(torque_limits))]
                for i in xrange(len(torque_limits)):
                    torque_range = 2.0 * torque_limits[i]
                    covariance_matrix[i, i] = np.square((torque_range / 100.0) * error)
            else:
                for i in xrange(self.robot_dof):
                    covariance_matrix[i, i] = np.square((self.max_velocity / 100.0) * error)
        else:
            lower_position_limits = v_double()
            upper_position_limits = v_double()
            velocity_limits = v_double()            
            robot.getJointLowerPositionLimits(active_joints, lower_position_limits)
            robot.getJointUpperPositionLimits(active_joints, upper_position_limits)
            robot.getJointVelocityLimits(active_joints, velocity_limits)            
            for i in xrange(self.robot_dof):
                position_range = upper_position_limits[i] - lower_position_limits[i]
                covariance_matrix[i, i] = np.square((position_range / 100.0) * error)            
                velocity_range = 2.0 * velocity_limits[i]
                covariance_matrix[i + self.robot_dof, i + self.robot_dof] = np.square((velocity_range / 100.0) * error)          
        return covariance_matrix            
        
            
    def problem_setup(self, delta_t, num_links):
        A = np.identity(num_links * 2)
        H = np.identity(num_links * 2)
        W = np.identity(num_links * 2)
        C = self.path_deviation_cost * np.identity(num_links * 2)
        
        '''B = delta_t * np.identity(num_links * 2)                
        V = np.identity(num_links * 2)
        D = self.control_deviation_cost * np.identity(num_links * 2)        
        M_base = np.identity(2 * self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)'''
        
        B = delta_t * np.vstack((np.identity(num_links),
                                 np.zeros((num_links, num_links))))
        V = np.vstack((np.identity(num_links),
                       np.zeros((num_links, num_links))))
        D = self.control_deviation_cost * np.identity(num_links)
        M_base = np.identity(self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)
        
        
        return A, H, B, V, W, C, D, M_base, N_base
        
    def set_params(self, config):
        self.num_paths = config['num_generated_paths']
        self.use_linear_path = config['use_linear_path']        
        self.max_velocity = config['max_velocity']
        self.delta_t = 1.0 / config['control_rate']
        self.start_state = config['start_state']        
        self.num_simulation_runs = config['num_simulation_runs']
        self.num_bins = config['num_bins']
        self.min_process_covariance = config['min_process_covariance']
        self.max_process_covariance = config['max_process_covariance']
        self.covariance_steps = config['covariance_steps']
        self.min_observation_covariance = config['min_observation_covariance']
        self.max_observation_covariance = config['max_observation_covariance']       
        self.discount_factor = config['discount_factor']
        self.illegal_move_penalty = config['illegal_move_penalty']
        self.step_penalty = config['step_penalty']
        self.exit_reward = config['exit_reward']
        self.stop_when_terminal = config['stop_when_terminal']        
        self.enforce_constraints = config['enforce_constraints']
        self.enforce_control_constraints = config['enforce_control_constraints']
        self.sample_size = config['sample_size']
        self.plot_paths = config['plot_paths']
        self.planning_algortihm = config['planning_algorithm']
        self.dynamic_problem = config['dynamic_problem'] 
        self.simulation_step_size = config['simulation_step_size']        
        self.path_timeout = config['path_timeout'] 
        self.continuous_collision = config['continuous_collision_check']
        self.show_viewer_evaluation = config['show_viewer_evaluation']
        self.show_viewer_simulation = config['show_viewer_simulation']   
        self.path_deviation_cost = config['path_deviation_cost'] 
        self.control_deviation_cost = config['control_deviation_cost']
        self.num_control_samples = config['num_control_samples'] 
        self.min_control_duration = config['min_control_duration']
        self.max_control_duration = config['max_control_duration']   
        self.inc_covariance = config['inc_covariance'] 
        self.add_intermediate_states = config['add_intermediate_states']
        self.gravity_constant = config['gravity']
        self.num_generated_goal_states = config['num_generated_goal_states']
        self.robot_file = config['robot_file']
        self.environment_file = config['environment_file']
        self.rrt_goal_bias = config['rrt_goal_bias']
        self.control_sampler = config['control_sampler']
        self.max_num_steps = config['max_num_steps']
        self.evaluation_horizon = config['horizon']
        self.timeout = config['timeout']
        self.seed = config['seed']
        self.num_cores = config['num_cores']
        self.replan_when_colliding = config['replan_when_colliding']
        self.acceleration_limit = config['acceleration_limit']
        
        
        """
        The number of steps a path is being executed before a new path gets calculated
        """
        self.num_execution_steps = config['num_execution_steps']
Esempio n. 3
0
class LQG:
    def __init__(self, show_scene, deserialize, append_paths):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_lqg.yaml", path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 4294967)
        np.random.seed(self.seed)  
        
        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level)        
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/lqg"
        tmp_dir = self.abs_path + "/tmp/lqg"        
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("LQG: Couldn't initialize robot")
            return               
        if not self.setup_scene(self.environment_file, self.robot):
            return
        #self.show_state_distribution(urdf_model_file, environment_file)
        #self.check_continuous_collide(self.robot_file, self.environment_file)
        if show_scene:
            self.test_sensor(self.robot_file, self.environment_file, self.abs_path + "/model/sensor.xml")            
            self.run_viewer(self.robot_file, self.environment_file)
        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator()
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()
        
                
        logging.info("LQG: Generating goal states...")        
        goal_states = get_goal_states("lqg",
                                      self.abs_path,
                                      self.serializer, 
                                      self.obstacles,                                                                           
                                      self.robot,                                    
                                      self.max_velocity,
                                      self.delta_t,
                                      self.start_state,
                                      self.goal_position,
                                      self.goal_radius,
                                      self.planning_algortihm,
                                      self.path_timeout,
                                      self.num_generated_goal_states,
                                      self.continuous_collision,
                                      self.environment_file,
                                      self.num_cores)  
        if len(goal_states) == 0:
            logging.error("LQG: Couldn't generate any goal states. Problem seems to be infeasible")
            return
        logging.info("LQG: Generated " + str(len(goal_states)) + " goal states")         
        sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward)  
        path_planner.setup(self.robot,                           
                           self.obstacles,  
                           self.max_velocity, 
                           self.delta_t, 
                           self.use_linear_path,
                           self.planning_algortihm,
                           self.path_timeout,
                           self.continuous_collision,
                           self.num_cores)
        
        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(self.robot_file,
                                               self.environment_file,
                                               self.simulation_step_size,
                                               self.num_control_samples,
                                               self.min_control_duration,
                                               self.max_control_duration,
                                               self.add_intermediate_states,
                                               self.rrt_goal_bias,
                                               self.control_sampler)       
        path_planner.set_start_and_goal(self.start_state, goal_states, self.goal_position, self.goal_radius)         
        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance, 
                                     self.max_process_covariance, 
                                     self.covariance_steps)                 
            elif self.inc_covariance == "observation":          
                m_covs = np.linspace(self.min_observation_covariance, 
                                     self.max_observation_covariance,
                                     self.covariance_steps)            
            emds = []
            mean_planning_times = []
            time_to_generate_paths = 0.0            
            paths = []
            if ((not append_paths) and deserialize):
                paths = self.serializer.deserialize_paths(self.abs_path + "/paths.txt", self.robot_dof)
                #paths = [paths[26]]
            if len(paths) == 0:
                print "LQG: Generating " + str(self.num_paths) + " paths from the inital state to the goal position..."
                t0 = time.time()
                paths = path_planner.plan_paths(self.num_paths, 0)
                if len(paths) == 0:
                    logging.error("LQG: Couldn't create any paths within the given time.")
                    return
                time_to_generate_paths = time.time() - t0 
                print "LQG: Time to generate paths: " + str(time_to_generate_paths) + " seconds"
                if self.plot_paths:                
                    self.serializer.save_paths(paths, "paths.yaml", self.overwrite_paths_file, path=dir)                
                if deserialize or append_paths:                    
                    self.serializer.serialize_paths(self.abs_path + "/paths.txt", paths, append_paths, self.robot_dof)                    
                    paths = self.serializer.deserialize_paths(self.abs_path + "/paths.txt", self.robot_dof)
            
            """ Determine average path length """
            avg_path_length = self.get_avg_path_length(paths)            
            self.serializer.save_avg_path_lengths(avg_path_length, path=dir)                                       
            cart_coords = []  
            best_paths = []
            all_rewards = []
            successes = []            
                                           
            for j in xrange(len(m_covs)):
                print "LQG: Evaluating " + str(len(paths)) + " paths for covariance value " + str(m_covs[j]) + "..."
                M = None
                N = None
                if self.inc_covariance == "process":
                    """ The process noise covariance matrix """
                    M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
                    #M = m_covs[j] * M_base
                    
                    """ The observation error covariance matrix """
                    N = self.calc_covariance_value(self.robot, 
                                                   self.min_observation_covariance, 
                                                   N_base, 
                                                   covariance_type='observation')                    
                elif self.inc_covariance == "observation":
                    M = self.calc_covariance_value(self.robot, 
                                                   self.min_process_covariance,
                                                   M_base)
                    N = self.calc_covariance_value(self.robot, 
                                                   m_covs[j],
                                                   N_base, 
                                                   covariance_type='observation')                    
                P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]) 
                path_evaluator.setup(A, B, C, D, H, M, N, V, W,                                     
                                     self.robot, 
                                     self.sample_size, 
                                     self.obstacles,
                                     self.goal_position,
                                     self.goal_radius,
                                     self.show_viewer_evaluation,
                                     self.robot_file,
                                     self.environment_file,
                                     self.num_cores)
                if self.dynamic_problem:
                    path_evaluator.setup_dynamic_problem()
                path_evaluator.setup_reward_function(self.step_penalty, self.illegal_move_penalty, self.exit_reward, self.discount_factor)
                t0 = time.time()
                 
                (path_index,
                 xs, 
                 us, 
                 zs, 
                 control_durations, 
                 objective, 
                 state_covariances,
                 deviation_covariances,
                 estimated_deviation_covariances) = path_evaluator.evaluate_paths(paths, P_t, 0)
                 
                te = time.time() - t0
                print "LQG: Time to evaluate " + str(len(paths)) + " paths: " + str(te) + "s"
                mean_planning_time = time_to_generate_paths + te
                print "LQG: Best objective value: " + str(objective)
                print "LQG: Length of best path: " + str(len(xs))
                print "LQG: Best path has index " + str(path_index)  
                best_paths.append([[xs[i] for i in xrange(len(xs))], 
                                   [us[i] for i in xrange(len(us))],
                                   [zs[i] for i in xrange(len(zs))],
                                   [control_durations[i] for i in xrange(len(control_durations))]])
                
                sim.setup_problem(A, B, C, D, H, V, W, M, N,
                                  self.robot, 
                                  self.enforce_control_constraints,
                                  self.obstacles, 
                                  self.goal_position, 
                                  self.goal_radius,
                                  self.max_velocity,                                  
                                  self.show_viewer_simulation,
                                  self.robot_file,
                                  self.environment_file)
                if self.dynamic_problem:
                    sim.setup_dynamic_problem(self.simulation_step_size)                
                successes = 0
                num_collisions = 0 
                rewards_cov = []
                final_states= []
                num_steps = 0
                collided_num = 0
                print "LQG: Running " + str(self.num_simulation_runs) + " simulations..."              
                for k in xrange(self.num_simulation_runs):
                    self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n")
                    print "simulation run: " + str(k)
                    (x_true, 
                     x_tilde,
                     x_tilde_linear, 
                     x_estimate, 
                     P_t, 
                     current_step, 
                     total_reward,
                     terminal,
                     estimated_s,
                     estimated_c,                     
                     history_entries) = sim.simulate_n_steps(xs, us, zs,                                                             
                                                             control_durations,
                                                             xs[0],
                                                             np.array([0.0 for i in xrange(2 * self.robot_dof)]),
                                                             np.array([0.0 for i in xrange(2 * self.robot_dof)]),
                                                             xs[0],
                                                             np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)]),
                                                             0.0,                                                           
                                                             0,
                                                             len(xs) - 1,
                                                             deviation_covariances,
                                                             estimated_deviation_covariances)
                    if terminal:
                        successes += 1
                    rewards_cov.append(total_reward)
                    #n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov))                    
                    collided = False
                    for l in xrange(len(history_entries)):
                        history_entries[l].set_estimated_covariance(state_covariances[l])                        
                        history_entries[l].serialize(tmp_dir, "log.log")
                        if history_entries[l].collided:                            
                            num_collisions += 1
                            collided = True                        
                    if collided:
                        collided_num += 1
                    num_steps += history_entries[-1].t
                    final_states.append(history_entries[-1].x_true)                                         
                    self.serializer.write_line("log.log", tmp_dir, "Reward: " + str(total_reward) + " \n") 
                    self.serializer.write_line("log.log", tmp_dir, "\n")
                    
                """ Calculate the distance to goal area
                """  
                ee_position_distances = [] 
                for state in final_states:
                    joint_angles = v_double()
                    joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
                    ee_position = v_double()
                    self.robot.getEndEffectorPosition(joint_angles, ee_position)
                    ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))])
                    dist = np.linalg.norm(np.array(self.goal_position - ee_position))
                    if dist < self.goal_radius:
                        dist = 0.0
                    ee_position_distances.append(dist)
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances))                         
                
                self.serializer.write_line("log.log", tmp_dir, "################################# \n")
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "inc_covariance: " + str(self.inc_covariance) + "\n")
                if self.inc_covariance == "process":                  
                    self.serializer.write_line("log.log",
                                               tmp_dir,
                                               "Process covariance: " + str(m_covs[j]) + " \n")
                    self.serializer.write_line("log.log",
                                               tmp_dir,
                                               "Observation covariance: " + str(self.min_observation_covariance) + " \n")
                    
                elif self.inc_covariance == "observation":                    
                    self.serializer.write_line("log.log",
                                               tmp_dir,
                                               "Process covariance: " + str(self.min_process_covariance) + " \n")
                    self.serializer.write_line("log.log",
                                               tmp_dir,
                                               "Observation covariance: " + str(m_covs[j]) + " \n")
                    
                num_steps /= self.num_simulation_runs
                self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(num_steps) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "Objective value of best path: " + str(objective) + " \n")                
                self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n")
                print "collision_prob " + str(collided_num / float(self.num_simulation_runs))
                print "total num collisions " + str(num_collisions)    
                print "mean num collisions " + str(float(num_collisions) / float(self.num_simulation_runs))
                
                self.serializer.write_line("log.log", tmp_dir, "Length best path: " + str(len(xs)) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "Index of best path: " + str(path_index) + " \n")
                self.serializer.write_line("log.log", 
                                           tmp_dir, 
                                           "Average distance to goal area: " + str(mean_distance_to_goal) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successes) + " \n")
                print "succ " + str((100.0 / self.num_simulation_runs) * successes)
                self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successes) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "Mean planning time: " + str(mean_planning_time) + " \n")
                
                n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov))
                print "mean_rewards " + str(mean)
                #plt.plot_histogram_from_data(rewards_cov)
                #sleep
                self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n")
                self.serializer.write_line("log.log", 
                                      tmp_dir, 
                                      "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n")
                cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_lqg_" + str(m_covs[j]) + ".log"
                os.system(cmd)
                
            
            if self.plot_paths:
                self.serializer.save_paths(best_paths, 'best_paths.yaml', True, path=dir)                      
            #self.serializer.save_stats(stats, path=dir)
            
            cmd = "cp " + self.abs_path + "/config_lqg.yaml " + dir           
            os.system(cmd)
            
            if not os.path.exists(dir + "/environment"):
                os.makedirs(dir + "/environment") 
            
            cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment"
            os.system(cmd) 
            
            if not os.path.exists(dir + "/model"):
                os.makedirs(dir + "/model")
                
            cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
            os.system(cmd)
        print "LQG: Time to generate paths: " + str(time_to_generate_paths) + " seconds"                   
        print "Done"
        
    def calc_covariance_value(self, robot, error, covariance_matrix, covariance_type='process'):
        active_joints = v_string()
        robot.getActiveJoints(active_joints)        
        if covariance_type == 'process':
            if self.dynamic_problem:            
                torque_limits = v_double()
                robot.getJointTorqueLimits(active_joints, torque_limits)
                torque_limits = [torque_limits[i] for i in xrange(len(torque_limits))]
                for i in xrange(len(torque_limits)):
                    torque_range = 2.0 * torque_limits[i]
                    covariance_matrix[i, i] = np.square((torque_range / 100.0) * error)
            else:
                for i in xrange(self.robot_dof):
                    covariance_matrix[i, i] = np.square((self.max_velocity / 100.0) * error)
        else:
            lower_position_limits = v_double()
            upper_position_limits = v_double()
            velocity_limits = v_double()            
            robot.getJointLowerPositionLimits(active_joints, lower_position_limits)
            robot.getJointUpperPositionLimits(active_joints, upper_position_limits)
            robot.getJointVelocityLimits(active_joints, velocity_limits)            
            for i in xrange(self.robot_dof):
                position_range = upper_position_limits[i] - lower_position_limits[i]
                covariance_matrix[i, i] = np.square((position_range / 100.0) * error)            
                velocity_range = 2.0 * velocity_limits[i]
                covariance_matrix[i + self.robot_dof, i + self.robot_dof] = np.square((velocity_range / 100.0) * error)          
        return covariance_matrix
        
        
    def init_robot(self, urdf_model_file):
        self.robot = Robot(self.abs_path + "/" + urdf_model_file)
        self.robot.enforceConstraints(self.enforce_constraints)
        self.robot.setGravityConstant(self.gravity_constant)
        self.robot.setAccelerationLimit(self.acceleration_limit)
        """ Setup operations """
        self.robot_dof = self.robot.getDOF()        
        if len(self.start_state) != 2 * self.robot_dof:
            logging.error("LQG: Start state dimension doesn't fit to the robot state space dimension")
            return False
        return True
        
    """
    Analyzing functions (will be removed later)
    =====================================================================
    """
           
           
    def sample_control_error(self, M):
        mu = np.array([0.0 for i in xrange(2 * self.robot_dof)])
        return np.random.multivariate_normal(mu, M)
    
    def show_state_distribution(self, model_file, env_file):
        self.robot.setupViewer(model_file, env_file)       
        M = 30.0 * np.identity(2 * self.robot_dof)
        active_joints = v_string()
        self.robot.getActiveJoints(active_joints)
        self.robot_dof = len(active_joints)
        
        #x = [0.0, -np.pi / 2.0, 0.0, 0.0, 0.0, 0.0]
        states = []
        for z in xrange(2000):
            u = [70.0, 70.0, 70.0, 0.0, 0.0, 0.0]
            current_state = v_double()
            current_state[:] = x
            control = v_double()
            control[:] = u            
            control_error = v_double()
            ce = self.sample_control_error(M)
            #ce = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            control_error[:] = ce
            x = None
            for k in xrange(1):
                result = v_double()
                self.robot.propagate(current_state,
                                     control,
                                     control_error,
                                     self.simulation_step_size,
                                     self.delta_t,
                                     result)                               
                x = [result[i] for i in xrange(len(result))]
                
                current_state[:] = x
                print x
            states.append(np.array(x[3:6]))
            x = [0.0, -np.pi/ 2.0, 0.0, 0.0, 0.0, 0.0]
            
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x[i] for i in xrange(len(x) / 2)]
            cjvels_arr = [x[i] for i in xrange(len(x) / 2, len(x))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            particle_joint_colors = v2_double()
            self.robot.updateViewerValues(cjvals, 
                                          cjvels,
                                          particle_joint_values,
                                          particle_joint_values)
        from plot import plot_3d_points
        mins = []
        maxs = []
        
        x_min = min([states[i][0] for i in xrange(len(states))])
        x_max = max([states[i][0] for i in xrange(len(states))])
        y_min = min([states[i][1] for i in xrange(len(states))])
        y_max = max([states[i][1] for i in xrange(len(states))])
        z_min = min([states[i][2] for i in xrange(len(states))])
        z_max = max([states[i][2] for i in xrange(len(states))])
        
        scale = [-0.2, 0.2]
        plot_3d_points(np.array(states), 
                       x_scale = [x_min, x_max], 
                       y_scale = [y_min, y_max], 
                       z_scale=  [z_min, z_max])
        sleep
        
    def check_continuous_collide(self, model_file, env_file):
        self.robot.setViewerBackgroundColor(0.6, 0.8, 0.6)
        self.robot.setViewerSize(1280, 768)
        self.robot.setupViewer(model_file, env_file)        
        x1 = [0.0115186,
              0.1404386,
              -0.17235397,
              -0.22907283,
              0.88922755,
              1.35271925,
              -3.38231963,
              -1.51419397]
        x2 = [0.04028825,
              0.18442375,
              -0.25677913,
              -0.33145854,
              0.81281939,
              1.51040733,
              -2.37524124,
              -3.43838968]
        #x1 = [np.pi - np.pi / 4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        #x1 = [np.pi / 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        #x2 = [np.pi / 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        
        coll_t = 0.0
        k = 0
        while True:
            k += 1            
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x1[i] for i in xrange(len(x1) / 2)]
            cjvels_arr = [x1[i] for i in xrange(len(x1) / 2, len(x1))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            
            ja_start = v_double()
            ja_goal = v_double()
            ja_start[:] = [x1[i] for i in xrange(len(x1) / 2)]
            ja_goal[:] = [x2[i] for i in xrange(len(x2) / 2)]
            collision_objects_start = self.robot.createRobotCollisionObjects(ja_start)
            collision_objects_goal = self.robot.createRobotCollisionObjects(ja_goal)
            in_collision_discrete = False
            in_collision_continuous = False             
            t0 = time.time()
            breaking = False      
            for o in self.obstacles:                
                #in_collision_discrete_start = o.inCollisionDiscrete(collision_objects_start)
                #in_collision_discrete_goal = o.inCollisionDiscrete(collision_objects_goal)
                for i in xrange(len(collision_objects_start)):
                    in_collision_continuous = o.inCollisionContinuous([collision_objects_start[i], collision_objects_goal[i]])
                    if in_collision_continuous:                        
                        break
                if in_collision_continuous:
                    break                              
                '''if in_collision_discrete_start:                    
                    print "start collides discrete"
                    in_collision_discrete = True
                    break
                if in_collision_discrete_goal:
                    print "goal collides discrete"
                    in_collision_discrete = True
                    break
                if in_collision_continuous:
                    print "collides continuous"
                    break                     
            print "in collision discrete " + str(in_collision_discrete)
            print "in collision continuous " + str(in_collision_continuous)'''
            print "in collision continuous " + str(in_collision_continuous)
            coll_t += time.time() - t0
            if k == 10000:
                print coll_t / k
                sleep
            self.robot.updateViewerValues(cjvals, 
                                          cjvels,
                                          particle_joint_values,
                                          particle_joint_values)
            #time.sleep(10)
            time.sleep(0.03)
             
    def test_sensor(self, model_file, env_file, sensor_file):
        show_viewer = True
        if show_viewer:
            self.robot.setViewerBackgroundColor(0.6, 0.8, 0.6)
            self.robot.setViewerSize(1280, 768)
            self.robot.setupViewer(model_file, env_file) 
            #self.robot.addSensor(sensor_file)
        x = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        u = [0.0, 0.0, 0.0]
        u_err = [0.0, 0.0, 0.0]
        
        current_state = v_double()
        control = v_double()
        control[:] = u
        
        control_error = v_double()
        control_error[:] = u_err
        
        result = v_double()
        angles = v_double()
        self.robot.setSensorTransform(current_state)
        while True:
            current_state[:] = x
            self.robot.propagate(current_state,
                                 control,
                                 control_error,
                                 self.simulation_step_size,
                                 0.03,
                                 result)            
            angles[:] = [result[i] for i in xrange(len(result) / 2)]
            self.robot.setSensorTransform(angles) 
            x = np.array([result[i] for i in xrange(len(result))])
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x[i] for i in xrange(len(x) / 2)]
            cjvels_arr = [x[i] for i in xrange(len(x) / 2, len(x))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            if show_viewer:                
                self.robot.updateViewerValues(cjvals, 
                                              cjvels,
                                              particle_joint_values,
                                              particle_joint_values)
            time.sleep(0.03)
            
            print "hello"   
        
    def run_viewer(self, model_file, env_file):        
        show_viewer = True
        rot = v_double()
        trans = v_double()
        rot[:] = [-1.0, 0.0, 0.0, 0.0]
        trans[:] = [0.0, 0.0, 3.0]        
        if show_viewer:
            self.robot.setViewerBackgroundColor(0.6, 0.8, 0.6)
            self.robot.setViewerSize(1280, 768)
            self.robot.setupViewer(model_file, env_file)
        fx = 0.0
        fy = 0.0
        fz = 0.0
        f_roll = 0.0
        f_pitch = 0.0
        f_yaw = 0.0         
        x = [0.0 for i in xrange(2 * self.robot_dof)]
        ''''x = [0.0, 0.0, 0.0, 0.0,
             0.0,
             0.0,
             0.0,
             0.0]'''
        #x = [1.78349, 0.868347, -0.927761]
        x[1] = 0.6 
        x_true = [0.0 for i in xrange(2 * self.robot_dof)]        
        integration_times = [] 
        collision_check_times1 = []
        collision_check_times2 = []     
        current_state = v_double()
        y = 0
        while True:
            u_in = [0.0 for i in xrange(self.robot_dof)]
            u_in[0] = 150.0
                        
            current_state[:] = x            
            control = v_double()            
            control[:] = u_in
            control_error = v_double()
            ce = [0.0 for i in xrange(self.robot_dof)]
            control_error[:] = ce
            result = v_double()
            
            
            ###########################
            ee_jacobian = v2_double()            
            self.robot.getEndEffectorJacobian(current_state, ee_jacobian);            
            ###########################
            t0 = time.time()                    
            self.robot.propagate(current_state,
                                 control,
                                 control_error,
                                 self.simulation_step_size,
                                 0.03,
                                 result) 
            #print result[11]          
            t = time.time() - t0
            integration_times.append(t)
            if y == 1000:
                t_sum = sum(integration_times)
                t_mean = t_sum / len(integration_times)
                print "mean integration times: " + str(t_mean)
                t_sum = sum(collision_check_times1)
                t_mean = t_sum / len(collision_check_times1)
                print "mean collision check times old " + str(t_mean)
                t_mean = sum(collision_check_times2) / len(collision_check_times2)
                print "mean collision check times new " + str(t_mean)
                sleep            
            
            ja_start = v_double()            
            ja_start[:] = [current_state[i] for i in xrange(len(current_state) / 2)]                     
            collision_objects_start = self.robot.createRobotCollisionObjects(ja_start)
            joint_angles = v_double()
            joint_angles[:] = [result[i] for i in xrange(len(result) / 2)]
            collision_objects_goal = self.robot.createRobotCollisionObjects(joint_angles)
            #print "ee_velocity " + str([ee_velocity[i] for i in xrange(len(ee_velocity))])
            t_coll_check1 = time.time()            
            t2 = time.time() - t_coll_check1
            collision_check_times2.append(t2)            
            
            '''
            Get the end effector position
            '''
            #ee_position = v_double()            
            #self.robot.getEndEffectorPosition(joint_angles, ee_position)                      
            #ee_collision_objects = self.robot.createEndEffectorCollisionObject(joint_angles)
            in_collision = False
            t_c = time.time()            
            for o in self.obstacles:                
                if o.inCollisionDiscrete(collision_objects_goal):                                        
                    in_collision = True
                    print "IN COLLISION"
                    break                              
                '''for i in xrange(len(collision_objects_start)):                        
                    if o.inCollisionContinuous([collision_objects_start[i], collision_objects_goal[i]]):
                        in_collision = True
                        break'''               
                if in_collision:
                    break
                    
            t = time.time() - t_c            
            collision_check_times1.append(t)          
                                
                                                                          
            x = np.array([result[i] for i in xrange(len(result))])
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x[i] for i in xrange(len(x) / 2)]
            cjvels_arr = [x[i] for i in xrange(len(x) / 2, len(x))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            if show_viewer:
                self.robot.updateViewerValues(cjvals, 
                                              cjvels,
                                              particle_joint_values,
                                              particle_joint_values)
            #time.sleep(0.03)
            time.sleep(0.03) 
            
            y += 1
            print y
            
    """
    ================================================================
    """
        
        
    def setup_scene(self,                    
                    environment_file,
                    robot):
        """ Load the obstacles """         
        self.obstacles = self.utils.loadObstaclesXML(self.abs_path + "/" + environment_file)      
        
        """ Load the goal area """
        goal_area = v_double()
        self.utils.loadGoalArea(self.abs_path + "/" + environment_file, goal_area)
        if len(goal_area) == 0:
            print "ERROR: Your environment file doesn't define a goal area"
            return False
        self.goal_position = [goal_area[i] for i in xrange(0, 3)]
        self.goal_radius = goal_area[3]
        return True
            
    def init_serializer(self):
        self.serializer = Serializer()
        self.serializer.create_temp_dir(self.abs_path, "lqg")        
            
    def clear_stats(self, dir):        
        if os.path.isdir(dir):
            cmd = "rm -rf " + dir + "/*"            
            os.system(cmd)
        else:
            os.makedirs(dir)
            
    def get_average_distance_to_goal_area(self, goal_position, goal_radius, cartesian_coords):        
        avg_dist = 0.0
        goal_pos = np.array(goal_position)        
        for i in xrange(len(cartesian_coords)):            
            cart = np.array(cartesian_coords[i])            
            dist = np.linalg.norm(goal_pos - cart)            
            if dist < goal_radius:
                dist = 0.0
            avg_dist += dist
        if avg_dist == 0.0:
            return avg_dist        
        return np.asscalar(avg_dist) / len(cartesian_coords)          
            
    def problem_setup(self, delta_t, num_links):
        A = np.identity(num_links * 2)
        H = np.identity(num_links * 2)
        W = np.identity(num_links * 2)
        C = self.path_deviation_cost * np.identity(num_links * 2)
        
        '''B = delta_t * np.identity(num_links * 2)                
        V = np.identity(num_links * 2)
        D = self.control_deviation_cost * np.identity(num_links * 2)        
        M_base = np.identity(2 * self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)'''
        
        B = delta_t * np.vstack((np.identity(num_links),
                                 np.zeros((num_links, num_links))))
        V = np.vstack((np.identity(num_links),
                       np.zeros((num_links, num_links))))
        D = self.control_deviation_cost * np.identity(num_links)
        M_base = np.identity(self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)
        
        
        return A, H, B, V, W, C, D, M_base, N_base
            
    def get_avg_path_length(self, paths):
        avg_length = 0.0
        for path in paths:            
            avg_length += len(path[0])
        return avg_length / len(paths)
        
    def set_params(self, config):        
        self.num_paths = config['num_generated_paths']
        self.use_linear_path = config['use_linear_path']        
        self.max_velocity = config['max_velocity']
        self.delta_t = 1.0 / config['control_rate']
        self.start_state = config['start_state']        
        self.num_simulation_runs = config['num_simulation_runs']
        self.num_bins = config['num_bins']
        self.min_process_covariance = config['min_process_covariance']
        self.max_process_covariance = config['max_process_covariance']
        self.covariance_steps = config['covariance_steps']
        self.min_observation_covariance = config['min_observation_covariance']
        self.max_observation_covariance = config['max_observation_covariance']       
        self.discount_factor = config['discount_factor']
        self.illegal_move_penalty = config['illegal_move_penalty']
        self.step_penalty = config['step_penalty']
        self.exit_reward = config['exit_reward']
        self.stop_when_terminal = config['stop_when_terminal']        
        self.enforce_constraints = config['enforce_constraints']
        self.enforce_control_constraints = config['enforce_control_constraints']
        self.sample_size = config['sample_size']
        self.plot_paths = config['plot_paths']
        self.planning_algortihm = config['planning_algorithm']
        self.dynamic_problem = config['dynamic_problem'] 
        self.simulation_step_size = config['simulation_step_size']        
        self.path_timeout = config['path_timeout'] 
        self.continuous_collision = config['continuous_collision_check']
        self.show_viewer_evaluation = config['show_viewer_evaluation']
        self.show_viewer_simulation = config['show_viewer_simulation']   
        self.path_deviation_cost = config['path_deviation_cost'] 
        self.control_deviation_cost = config['control_deviation_cost']
        self.num_control_samples = config['num_control_samples'] 
        self.min_control_duration = config['min_control_duration']
        self.max_control_duration = config['max_control_duration']   
        self.inc_covariance = config['inc_covariance'] 
        self.add_intermediate_states = config['add_intermediate_states']
        self.gravity_constant = config['gravity']
        self.num_generated_goal_states = config['num_generated_goal_states']
        self.robot_file = config['robot_file']        
        self.environment_file = config['environment_file']
        self.rrt_goal_bias = config['rrt_goal_bias']
        self.control_sampler = config['control_sampler']
        self.seed = config['seed']
        self.num_cores = config['num_cores']
        self.acceleration_limit = config['acceleration_limit']
Esempio n. 4
0
File: lqg.py Progetto: hebinbing/LQG
class LQG:
    def __init__(self, show_scene, deserialize, append_paths):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        cmd = "rm -rf " + self.abs_path + "/tmp"
        popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True)
        popen.wait()
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_lqg.yaml",
                                             path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 42949672)
        np.random.seed(self.seed)

        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s',
                            level=logging_level)
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/lqg"
        tmp_dir = self.abs_path + "/tmp/lqg"
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("LQG: Couldn't initialize robot")
            return
        if not self.setup_scene(self.environment_file, self.robot):
            return
        #self.show_state_distribution(urdf_model_file, environment_file)
        #self.check_continuous_collide(self.robot_file, self.environment_file)
        if show_scene:
            self.run_viewer(self.robot_file, self.environment_file)
        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator()
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()

        logging.info("LQG: Generating goal states...")
        goal_states = get_goal_states(
            "lqg", self.abs_path, self.serializer, self.obstacles, self.robot,
            self.max_velocity, self.delta_t, self.start_state,
            self.goal_position, self.goal_radius, self.planning_algortihm,
            self.path_timeout, self.num_generated_goal_states,
            self.continuous_collision, self.environment_file, self.num_cores)
        if len(goal_states) == 0:
            logging.error(
                "LQG: Couldn't generate any goal states. Problem seems to be infeasible"
            )
            return
        logging.info("LQG: Generated " + str(len(goal_states)) +
                     " goal states")
        sim.setup_reward_function(self.discount_factor, self.step_penalty,
                                  self.illegal_move_penalty, self.exit_reward)
        path_planner.setup(self.robot, self.obstacles, self.max_velocity,
                           self.delta_t, self.use_linear_path,
                           self.planning_algortihm, self.path_timeout,
                           self.continuous_collision, self.num_cores)

        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(
                self.robot_file, self.environment_file,
                self.simulation_step_size, self.num_control_samples,
                self.min_control_duration, self.max_control_duration,
                self.add_intermediate_states, self.rrt_goal_bias,
                self.control_sampler)
        path_planner.set_start_and_goal(self.start_state, goal_states,
                                        self.goal_position, self.goal_radius)
        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(
            self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance,
                                     self.max_process_covariance,
                                     self.covariance_steps)
            elif self.inc_covariance == "observation":
                m_covs = np.linspace(self.min_observation_covariance,
                                     self.max_observation_covariance,
                                     self.covariance_steps)
            emds = []
            mean_planning_times = []
            time_to_generate_paths = 0.0
            paths = []
            if ((not append_paths) and deserialize):
                paths = self.serializer.deserialize_paths(
                    self.abs_path + "/paths.txt", self.robot_dof)
                #paths = [paths[26]]
            if len(paths) == 0:
                print "LQG: Generating " + str(
                    self.num_paths
                ) + " paths from the inital state to the goal position..."
                t0 = time.time()
                paths = path_planner.plan_paths(self.num_paths, 0)
                if len(paths) == 0:
                    logging.error(
                        "LQG: Couldn't create any paths within the given time."
                    )
                    return
                time_to_generate_paths = time.time() - t0
                print "LQG: Time to generate paths: " + str(
                    time_to_generate_paths) + " seconds"
                if self.plot_paths:
                    self.serializer.save_paths(paths,
                                               "paths.yaml",
                                               self.overwrite_paths_file,
                                               path=dir)
                if deserialize or append_paths:
                    self.serializer.serialize_paths(
                        self.abs_path + "/paths.txt", paths, append_paths,
                        self.robot_dof)
                    paths = self.serializer.deserialize_paths(
                        self.abs_path + "/paths.txt", self.robot_dof)
            """ Determine average path length """
            avg_path_length = self.get_avg_path_length(paths)
            self.serializer.save_avg_path_lengths(avg_path_length, path=dir)
            cart_coords = []
            best_paths = []
            all_rewards = []
            successes = []

            for j in xrange(len(m_covs)):
                print "LQG: Evaluating " + str(
                    len(paths)) + " paths for covariance value " + str(
                        m_covs[j]) + "..."
                M = None
                N = None
                if self.inc_covariance == "process":
                    """ The process noise covariance matrix """
                    M = self.calc_covariance_value(self.robot, m_covs[j],
                                                   M_base)
                    #M = m_covs[j] * M_base
                    """ The observation error covariance matrix """
                    N = self.calc_covariance_value(
                        self.robot,
                        self.min_observation_covariance,
                        N_base,
                        covariance_type='observation')
                elif self.inc_covariance == "observation":
                    M = self.calc_covariance_value(self.robot,
                                                   self.min_process_covariance,
                                                   M_base)
                    N = self.calc_covariance_value(
                        self.robot,
                        m_covs[j],
                        N_base,
                        covariance_type='observation')
                P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)]
                                for l in xrange(2 * self.robot_dof)])
                path_evaluator.setup(A, B, C, D, H, M, N, V, W, self.robot,
                                     self.sample_size, self.obstacles,
                                     self.goal_position, self.goal_radius,
                                     self.show_viewer_evaluation,
                                     self.robot_file, self.environment_file,
                                     self.num_cores)
                if self.dynamic_problem:
                    path_evaluator.setup_dynamic_problem()
                path_evaluator.setup_reward_function(self.step_penalty,
                                                     self.illegal_move_penalty,
                                                     self.exit_reward,
                                                     self.discount_factor)
                t0 = time.time()

                (path_index, xs, us, zs, control_durations, objective,
                 state_covariances, deviation_covariances,
                 estimated_deviation_covariances
                 ) = path_evaluator.evaluate_paths(paths, P_t, 0)

                te = time.time() - t0
                print "LQG: Time to evaluate " + str(
                    len(paths)) + " paths: " + str(te) + "s"
                mean_planning_time = time_to_generate_paths + te
                print "LQG: Best objective value: " + str(objective)
                print "LQG: Length of best path: " + str(len(xs))
                print "LQG: Best path has index " + str(path_index)
                best_paths.append([[xs[i] for i in xrange(len(xs))],
                                   [us[i] for i in xrange(len(us))],
                                   [zs[i] for i in xrange(len(zs))],
                                   [
                                       control_durations[i]
                                       for i in xrange(len(control_durations))
                                   ]])

                sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot,
                                  self.enforce_control_constraints,
                                  self.obstacles, self.goal_position,
                                  self.goal_radius, self.max_velocity,
                                  self.show_viewer_simulation, self.robot_file,
                                  self.environment_file, self.collision_aware)
                if self.dynamic_problem:
                    sim.setup_dynamic_problem(self.simulation_step_size)
                successes = 0
                num_collisions = 0
                rewards_cov = []
                final_states = []
                num_steps = 0
                collided_num = 0
                print "LQG: Running " + str(
                    self.num_simulation_runs) + " simulations..."
                for k in xrange(self.num_simulation_runs):
                    self.serializer.write_line("log.log", tmp_dir,
                                               "RUN #" + str(k + 1) + " \n")
                    print "simulation run: " + str(k)
                    (x_true, x_tilde, x_tilde_linear, x_estimate, z, P_t,
                     current_step, total_reward, terminal, estimated_s,
                     estimated_c, history_entries) = sim.simulate_n_steps(
                         xs, us, zs, control_durations, xs[0], xs[0],
                         np.array([[0.0 for k in xrange(2 * self.robot_dof)]
                                   for l in xrange(2 * self.robot_dof)]), 0.0,
                         0,
                         len(xs) - 1, deviation_covariances,
                         estimated_deviation_covariances)
                    if terminal:
                        print "TERMINAL"
                        successes += 1
                    rewards_cov.append(total_reward)
                    #n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov))
                    collided = False
                    for l in xrange(len(history_entries)):
                        history_entries[l].set_estimated_covariance(
                            state_covariances[l])
                        history_entries[l].serialize(tmp_dir, "log.log")
                        if history_entries[l].collided:
                            num_collisions += 1
                            collided = True
                    if collided:
                        collided_num += 1
                    num_steps += history_entries[-1].t
                    final_states.append(history_entries[-1].x_true)
                    self.serializer.write_line(
                        "log.log", tmp_dir,
                        "Reward: " + str(total_reward) + " \n")
                    self.serializer.write_line("log.log", tmp_dir, "\n")
                """ Calculate the distance to goal area
                """
                ee_position_distances = []
                for state in final_states:
                    joint_angles = v_double()
                    joint_angles[:] = [
                        state[y] for y in xrange(len(state) / 2)
                    ]
                    ee_position = v_double()
                    self.robot.getEndEffectorPosition(joint_angles,
                                                      ee_position)
                    ee_position = np.array(
                        [ee_position[y] for y in xrange(len(ee_position))])
                    dist = np.linalg.norm(
                        np.array(self.goal_position - ee_position))
                    if dist < self.goal_radius:
                        dist = 0.0
                    ee_position_distances.append(dist)
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(
                    np.array(ee_position_distances))

                self.serializer.write_line(
                    "log.log", tmp_dir, "################################# \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "inc_covariance: " + str(self.inc_covariance) + "\n")
                if self.inc_covariance == "process":
                    self.serializer.write_line(
                        "log.log", tmp_dir,
                        "Process covariance: " + str(m_covs[j]) + " \n")
                    self.serializer.write_line(
                        "log.log", tmp_dir, "Observation covariance: " +
                        str(self.min_observation_covariance) + " \n")

                elif self.inc_covariance == "observation":
                    self.serializer.write_line(
                        "log.log", tmp_dir, "Process covariance: " +
                        str(self.min_process_covariance) + " \n")
                    self.serializer.write_line(
                        "log.log", tmp_dir,
                        "Observation covariance: " + str(m_covs[j]) + " \n")

                num_steps /= self.num_simulation_runs
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Mean number of steps: " + str(num_steps) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Objective value of best path: " + str(objective) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir, "Mean num collisions per run: " + str(
                        float(num_collisions) /
                        float(self.num_simulation_runs)) + " \n")
                print "collision_prob " + str(
                    collided_num / float(self.num_simulation_runs))
                print "total num collisions " + str(num_collisions)
                print "mean num collisions " + str(
                    float(num_collisions) / float(self.num_simulation_runs))

                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Length best path: " + str(len(xs)) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Index of best path: " + str(path_index) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir, "Average distance to goal area: " +
                    str(mean_distance_to_goal) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Num successes: " + str(successes) + " \n")
                print "succ " + str(
                    (100.0 / self.num_simulation_runs) * successes)
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Percentage of successful runs: " + str(
                        (100.0 / self.num_simulation_runs) * successes) +
                    " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Mean planning time: " + str(mean_planning_time) + " \n")

                n, min_max, mean, var, skew, kurt = scipy.stats.describe(
                    np.array(rewards_cov))
                print "mean_rewards " + str(mean)
                #plt.plot_histogram_from_data(rewards_cov)
                #sleep
                self.serializer.write_line(
                    "log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir, "Reward variance: " + str(var) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
                self.serializer.write_line("log.log", tmp_dir,
                                           "Seed: " + str(self.seed) + " \n")
                cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_lqg_" + str(
                    m_covs[j]) + ".log"
                os.system(cmd)

            if self.plot_paths:
                self.serializer.save_paths(best_paths,
                                           'best_paths.yaml',
                                           True,
                                           path=dir)
            #self.serializer.save_stats(stats, path=dir)

            cmd = "cp " + self.abs_path + "/config_lqg.yaml " + dir
            os.system(cmd)

            if not os.path.exists(dir + "/environment"):
                os.makedirs(dir + "/environment")

            cmd = "cp " + self.abs_path + "/" + str(
                self.environment_file) + " " + str(dir) + "/environment"
            os.system(cmd)

            if not os.path.exists(dir + "/model"):
                os.makedirs(dir + "/model")

            cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
            os.system(cmd)
        print "LQG: Time to generate paths: " + str(
            time_to_generate_paths) + " seconds"
        print "Done"

    def calc_covariance_value(self,
                              robot,
                              error,
                              covariance_matrix,
                              covariance_type='process'):
        active_joints = v_string()
        robot.getActiveJoints(active_joints)
        if covariance_type == 'process':
            if self.dynamic_problem:
                torque_limits = v_double()
                robot.getJointTorqueLimits(active_joints, torque_limits)
                torque_limits = [
                    torque_limits[i] for i in xrange(len(torque_limits))
                ]
                for i in xrange(len(torque_limits)):
                    torque_range = 2.0 * torque_limits[i]
                    covariance_matrix[i, i] = np.square(
                        (torque_range / 100.0) * error)
            else:
                for i in xrange(self.robot_dof):
                    covariance_matrix[i, i] = np.square(
                        (self.max_velocity / 100.0) * error)
        else:
            lower_position_limits = v_double()
            upper_position_limits = v_double()
            velocity_limits = v_double()
            robot.getJointLowerPositionLimits(active_joints,
                                              lower_position_limits)
            robot.getJointUpperPositionLimits(active_joints,
                                              upper_position_limits)
            robot.getJointVelocityLimits(active_joints, velocity_limits)
            for i in xrange(self.robot_dof):
                position_range = upper_position_limits[
                    i] - lower_position_limits[i]
                covariance_matrix[i, i] = np.square(
                    (position_range / 100.0) * error)
                velocity_range = 2.0 * velocity_limits[i]
                covariance_matrix[i + self.robot_dof,
                                  i + self.robot_dof] = np.square(
                                      (velocity_range / 100.0) * error)
        return covariance_matrix

    def init_robot(self, urdf_model_file):
        self.robot = Robot(self.abs_path + "/" + urdf_model_file)
        self.robot.enforceConstraints(self.enforce_constraints)
        self.robot.setGravityConstant(self.gravity_constant)
        self.robot.setAccelerationLimit(self.acceleration_limit)
        """ Setup operations """
        self.robot_dof = self.robot.getDOF()
        if len(self.start_state) != 2 * self.robot_dof:
            logging.error(
                "LQG: Start state dimension doesn't fit to the robot state space dimension"
            )
            return False
        return True

    """
    Analyzing functions (will be removed later)
    =====================================================================
    """

    def sample_control_error(self, M):
        mu = np.array([0.0 for i in xrange(2 * self.robot_dof)])
        return np.random.multivariate_normal(mu, M)

    def show_state_distribution(self, model_file, env_file):
        self.robot.setupViewer(model_file, env_file)
        M = 30.0 * np.identity(2 * self.robot_dof)
        active_joints = v_string()
        self.robot.getActiveJoints(active_joints)
        self.robot_dof = len(active_joints)

        #x = [0.0, -np.pi / 2.0, 0.0, 0.0, 0.0, 0.0]
        states = []
        for z in xrange(2000):
            u = [70.0, 70.0, 70.0, 0.0, 0.0, 0.0]
            current_state = v_double()
            current_state[:] = x
            control = v_double()
            control[:] = u
            control_error = v_double()
            ce = self.sample_control_error(M)
            #ce = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
            control_error[:] = ce
            x = None
            for k in xrange(1):
                result = v_double()
                self.robot.propagate(current_state, control, control_error,
                                     self.simulation_step_size, self.delta_t,
                                     result)
                x = [result[i] for i in xrange(len(result))]

                current_state[:] = x
                print x
            states.append(np.array(x[3:6]))
            x = [0.0, -np.pi / 2.0, 0.0, 0.0, 0.0, 0.0]

            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x[i] for i in xrange(len(x) / 2)]
            cjvels_arr = [x[i] for i in xrange(len(x) / 2, len(x))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            particle_joint_colors = v2_double()
            self.robot.updateViewerValues(cjvals, cjvels,
                                          particle_joint_values,
                                          particle_joint_values)
        from plot import plot_3d_points
        mins = []
        maxs = []

        x_min = min([states[i][0] for i in xrange(len(states))])
        x_max = max([states[i][0] for i in xrange(len(states))])
        y_min = min([states[i][1] for i in xrange(len(states))])
        y_max = max([states[i][1] for i in xrange(len(states))])
        z_min = min([states[i][2] for i in xrange(len(states))])
        z_max = max([states[i][2] for i in xrange(len(states))])

        scale = [-0.2, 0.2]
        plot_3d_points(np.array(states),
                       x_scale=[x_min, x_max],
                       y_scale=[y_min, y_max],
                       z_scale=[z_min, z_max])
        sleep

    def check_continuous_collide(self, model_file, env_file):
        self.robot.setViewerBackgroundColor(0.6, 0.8, 0.6)
        self.robot.setViewerSize(1280, 768)
        self.robot.setupViewer(model_file, env_file)
        x1 = [
            0.0115186, 0.1404386, -0.17235397, -0.22907283, 0.88922755,
            1.35271925, -3.38231963, -1.51419397
        ]
        x2 = [
            0.04028825, 0.18442375, -0.25677913, -0.33145854, 0.81281939,
            1.51040733, -2.37524124, -3.43838968
        ]
        #x1 = [np.pi - np.pi / 4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        #x1 = [np.pi / 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        #x2 = [np.pi / 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

        coll_t = 0.0
        k = 0
        while True:
            k += 1
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x1[i] for i in xrange(len(x1) / 2)]
            cjvels_arr = [x1[i] for i in xrange(len(x1) / 2, len(x1))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()

            ja_start = v_double()
            ja_goal = v_double()
            ja_start[:] = [x1[i] for i in xrange(len(x1) / 2)]
            ja_goal[:] = [x2[i] for i in xrange(len(x2) / 2)]
            collision_objects_start = self.robot.createRobotCollisionObjects(
                ja_start)
            collision_objects_goal = self.robot.createRobotCollisionObjects(
                ja_goal)
            in_collision_discrete = False
            in_collision_continuous = False
            t0 = time.time()
            breaking = False
            for o in self.obstacles:
                #in_collision_discrete_start = o.inCollisionDiscrete(collision_objects_start)
                #in_collision_discrete_goal = o.inCollisionDiscrete(collision_objects_goal)
                for i in xrange(len(collision_objects_start)):
                    in_collision_continuous = o.inCollisionContinuous([
                        collision_objects_start[i], collision_objects_goal[i]
                    ])
                    if in_collision_continuous:
                        break
                if in_collision_continuous:
                    break
                '''if in_collision_discrete_start:                    
                    print "start collides discrete"
                    in_collision_discrete = True
                    break
                if in_collision_discrete_goal:
                    print "goal collides discrete"
                    in_collision_discrete = True
                    break
                if in_collision_continuous:
                    print "collides continuous"
                    break                     
            print "in collision discrete " + str(in_collision_discrete)
            print "in collision continuous " + str(in_collision_continuous)'''
            print "in collision continuous " + str(in_collision_continuous)
            coll_t += time.time() - t0
            if k == 10000:
                print coll_t / k
                sleep
            self.robot.updateViewerValues(cjvals, cjvels,
                                          particle_joint_values,
                                          particle_joint_values)
            #time.sleep(10)
            time.sleep(0.03)

    def run_viewer(self, model_file, env_file):
        show_viewer = True
        rot = v_double()
        trans = v_double()
        rot[:] = [-1.0, 0.0, 0.0, 0.0]
        trans[:] = [0.0, 0.0, 3.0]
        if show_viewer:
            self.robot.setViewerBackgroundColor(0.6, 0.8, 0.6)
            self.robot.setViewerSize(1280, 768)
            self.robot.setupViewer(model_file, env_file)
        fx = 0.0
        fy = 0.0
        fz = 0.0
        f_roll = 0.0
        f_pitch = 0.0
        f_yaw = 0.0
        x = self.start_state
        ''''x = [0.0 for i in xrange(2 * self.robot_dof)]
        x = [0.0, 0.0, 0.0, 0.0,
             0.0,
             0.0,
             0.0,
             0.0]
        x_true = [0.0 for i in xrange(2 * self.robot_dof)]'''
        integration_times = []
        collision_check_times1 = []
        collision_check_times2 = []

        y = 0
        while True:
            #u_in = [3.0, 1.5, 0.0, 0.0, 0.0, 0.0]
            u_in = [0.0 for i in xrange(self.robot_dof)]

            #u_in[0] = 150.0
            #u_in[1] = 70.0
            current_state = v_double()
            current_state[:] = x
            control = v_double()
            control[:] = u_in

            control_error = v_double()
            ce = [0.0 for i in xrange(self.robot_dof)]
            control_error[:] = ce
            result = v_double()

            t0 = time.time()
            self.robot.propagate(current_state, control, control_error,
                                 self.simulation_step_size, self.delta_t,
                                 result)
            t = time.time() - t0
            integration_times.append(t)
            if y == 10000:
                t_sum = sum(integration_times)
                t_mean = t_sum / len(integration_times)
                print "mean integration times: " + str(t_mean)
                t_sum = sum(collision_check_times1)
                t_mean = t_sum / len(collision_check_times1)
                print "mean collision check times old " + str(t_mean)
                t_mean = sum(collision_check_times2) / len(
                    collision_check_times2)
                print "mean collision check times new " + str(t_mean)
                sleep
            print "current state " + str(
                [current_state[i] for i in xrange(len(current_state))])
            ja_start = v_double()
            ja_start[:] = [
                current_state[i] for i in xrange(len(current_state) / 2)
            ]
            collision_objects_start = self.robot.createRobotCollisionObjects(
                ja_start)
            joint_angles = v_double()
            joint_angles[:] = [result[i] for i in xrange(len(result) / 2)]
            collision_objects_goal = self.robot.createRobotCollisionObjects(
                joint_angles)
            #print "ee_velocity " + str([ee_velocity[i] for i in xrange(len(ee_velocity))])
            t_coll_check1 = time.time()
            t2 = time.time() - t_coll_check1
            collision_check_times2.append(t2)
            '''
            Get the end effector position
            '''
            #ee_position = v_double()
            #self.robot.getEndEffectorPosition(joint_angles, ee_position)
            #ee_collision_objects = self.robot.createEndEffectorCollisionObject(joint_angles)
            in_collision = False
            t_c = time.time()
            print "check coll"

            t = time.time() - t_c
            collision_check_times1.append(t)

            x = np.array([result[i] for i in xrange(len(result))])
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x[i] for i in xrange(len(x) / 2)]
            cjvels_arr = [x[i] for i in xrange(len(x) / 2, len(x))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            if show_viewer:
                self.robot.updateViewerValues(cjvals, cjvels,
                                              particle_joint_values,
                                              particle_joint_values)
            time.sleep(self.delta_t)

            y += 1
            print y

    """
    ================================================================
    """

    def setup_scene(self, environment_file, robot):
        """ Load the obstacles """
        self.obstacles = self.utils.loadObstaclesXML(self.abs_path + "/" +
                                                     environment_file)
        """ Load the goal area """
        goal_area = v_double()
        self.utils.loadGoalArea(self.abs_path + "/" + environment_file,
                                goal_area)
        if len(goal_area) == 0:
            print "ERROR: Your environment file doesn't define a goal area"
            return False
        self.goal_position = [goal_area[i] for i in xrange(0, 3)]
        self.goal_radius = goal_area[3]
        return True

    def init_serializer(self):
        self.serializer = Serializer()
        self.serializer.create_temp_dir(self.abs_path, "lqg")

    def clear_stats(self, dir):
        if os.path.isdir(dir):
            cmd = "rm -rf " + dir + "/*"
            os.system(cmd)
        else:
            os.makedirs(dir)

    def get_average_distance_to_goal_area(self, goal_position, goal_radius,
                                          cartesian_coords):
        avg_dist = 0.0
        goal_pos = np.array(goal_position)
        for i in xrange(len(cartesian_coords)):
            cart = np.array(cartesian_coords[i])
            dist = np.linalg.norm(goal_pos - cart)
            if dist < goal_radius:
                dist = 0.0
            avg_dist += dist
        if avg_dist == 0.0:
            return avg_dist
        return np.asscalar(avg_dist) / len(cartesian_coords)

    def problem_setup(self, delta_t, num_links):
        A = np.identity(num_links * 2)
        H = np.identity(num_links * 2)
        W = np.identity(num_links * 2)
        C = self.path_deviation_cost * np.identity(num_links * 2)
        '''B = delta_t * np.identity(num_links * 2)                
        V = np.identity(num_links * 2)
        D = self.control_deviation_cost * np.identity(num_links * 2)        
        M_base = np.identity(2 * self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)'''

        B = delta_t * np.vstack(
            (np.identity(num_links), np.zeros((num_links, num_links))))
        V = np.vstack((np.identity(num_links), np.zeros(
            (num_links, num_links))))
        D = self.control_deviation_cost * np.identity(num_links)
        M_base = np.identity(self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)

        return A, H, B, V, W, C, D, M_base, N_base

    def get_avg_path_length(self, paths):
        avg_length = 0.0
        for path in paths:
            avg_length += len(path[0])
        return avg_length / len(paths)

    def set_params(self, config):
        self.num_paths = config['num_generated_paths']
        self.use_linear_path = config['use_linear_path']
        self.max_velocity = config['max_velocity']
        self.delta_t = 1.0 / config['control_rate']
        self.start_state = config['start_state']
        self.num_simulation_runs = config['num_simulation_runs']
        self.num_bins = config['num_bins']
        self.min_process_covariance = config['min_process_covariance']
        self.max_process_covariance = config['max_process_covariance']
        self.covariance_steps = config['covariance_steps']
        self.min_observation_covariance = config['min_observation_covariance']
        self.max_observation_covariance = config['max_observation_covariance']
        self.discount_factor = config['discount_factor']
        self.illegal_move_penalty = config['illegal_move_penalty']
        self.step_penalty = config['step_penalty']
        self.exit_reward = config['exit_reward']
        self.stop_when_terminal = config['stop_when_terminal']
        self.enforce_constraints = config['enforce_constraints']
        self.enforce_control_constraints = config[
            'enforce_control_constraints']
        self.sample_size = config['sample_size']
        self.plot_paths = config['plot_paths']
        self.planning_algortihm = config['planning_algorithm']
        self.dynamic_problem = config['dynamic_problem']
        self.simulation_step_size = config['simulation_step_size']
        self.path_timeout = config['path_timeout']
        self.continuous_collision = config['continuous_collision_check']
        self.show_viewer_evaluation = config['show_viewer_evaluation']
        self.show_viewer_simulation = config['show_viewer_simulation']
        self.path_deviation_cost = config['path_deviation_cost']
        self.control_deviation_cost = config['control_deviation_cost']
        self.num_control_samples = config['num_control_samples']
        self.min_control_duration = config['min_control_duration']
        self.max_control_duration = config['max_control_duration']
        self.inc_covariance = config['inc_covariance']
        self.add_intermediate_states = config['add_intermediate_states']
        self.gravity_constant = config['gravity']
        self.num_generated_goal_states = config['num_generated_goal_states']
        self.robot_file = config['robot_file']
        self.environment_file = config['environment_file']
        self.rrt_goal_bias = config['rrt_goal_bias']
        self.control_sampler = config['control_sampler']
        self.seed = config['seed']
        self.num_cores = config['num_cores']
        self.acceleration_limit = config['acceleration_limit']
        self.collision_aware = config['knows_collision']
Esempio n. 5
0
File: mpc.py Progetto: hebinbing/LQG
class MPC:
    def __init__(self, plot):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_mpc.yaml",
                                             path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 42949672)
        np.random.seed(self.seed)

        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s',
                            level=logging_level)
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/mpc"
        tmp_dir = self.abs_path + "/tmp/mpc"
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("MPC: Couldn't initialize robot")
            return
        if not self.setup_scene(self.environment_file, self.robot):
            return
        #self.run_viewer(self.robot_file, self.environment_file)
        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator()
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()
        logging.info("MPC: Generating goal states...")
        goal_states = get_goal_states(
            "mpc", self.abs_path, self.serializer, self.obstacles, self.robot,
            self.max_velocity, self.delta_t, self.start_state,
            self.goal_position, self.goal_radius, self.planning_algortihm,
            self.path_timeout, self.num_generated_goal_states,
            self.continuous_collision, self.environment_file, self.num_cores)

        if len(goal_states) == 0:
            logging.error(
                "MPC: Couldn't generate any goal states. Problem seems to be infeasible"
            )
            return
        logging.info("MPC: Generated " + str(len(goal_states)) +
                     " goal states")
        sim.setup_reward_function(self.discount_factor, self.step_penalty,
                                  self.illegal_move_penalty, self.exit_reward)
        path_planner.setup(self.robot, self.obstacles, self.max_velocity,
                           self.delta_t, self.use_linear_path,
                           self.planning_algortihm, self.path_timeout,
                           self.continuous_collision, self.num_cores)
        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(
                self.robot_file, self.environment_file,
                self.simulation_step_size, self.num_control_samples,
                self.min_control_duration, self.max_control_duration,
                self.add_intermediate_states, self.rrt_goal_bias,
                self.control_sampler)

        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(
            self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance,
                                     self.max_process_covariance,
                                     self.covariance_steps)
            elif self.inc_covariance == "observation":
                m_covs = np.linspace(self.min_observation_covariance,
                                     self.max_observation_covariance,
                                     self.covariance_steps)
        for j in xrange(len(m_covs)):
            M = None
            N = None
            if self.inc_covariance == "process":
                """ The process noise covariance matrix """
                M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
                #M = m_covs[j] * M_base
                """ The observation error covariance matrix """
                N = self.calc_covariance_value(self.robot,
                                               self.min_observation_covariance,
                                               N_base,
                                               covariance_type='observation')
            elif self.inc_covariance == "observation":
                M = self.calc_covariance_value(self.robot,
                                               self.min_process_covariance,
                                               M_base)
                N = self.calc_covariance_value(self.robot,
                                               m_covs[j],
                                               N_base,
                                               covariance_type='observation')
            P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)]
                            for l in xrange(2 * self.robot_dof)])

            path_planner.setup_path_evaluator(
                A, B, C, D, H, M, N, V, W, self.robot, self.sample_size,
                self.obstacles, self.goal_position, self.goal_radius,
                self.robot_file, self.environment_file)
            sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot,
                              self.enforce_control_constraints, self.obstacles,
                              self.goal_position, self.goal_radius,
                              self.max_velocity, self.show_viewer_simulation,
                              self.robot_file, self.environment_file)
            sim.set_stop_when_colliding(self.replan_when_colliding)
            if self.dynamic_problem:
                path_evaluator.setup_dynamic_problem()
                sim.setup_dynamic_problem(self.simulation_step_size)
            path_planner.setup_reward_function(self.step_penalty,
                                               self.exit_reward,
                                               self.illegal_move_penalty,
                                               self.discount_factor)
            mean_number_planning_steps = 0.0
            number_of_steps = 0.0
            mean_planning_time = 0.0
            num_generated_paths_run = 0.0
            successful_runs = 0
            num_collisions = 0.0
            linearization_error = 0.0
            final_states = []
            rewards_cov = []
            for k in xrange(self.num_simulation_runs):
                print "MPC: Run " + str(k + 1)
                self.serializer.write_line("log.log", tmp_dir,
                                           "RUN #" + str(k + 1) + " \n")
                current_step = 0
                x_true = self.start_state
                x_estimate = self.start_state
                x_tilde_linear = np.array(
                    [0.0 for i in xrange(2 * self.robot_dof)])
                P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)]
                                for i in xrange(2 * self.robot_dof)])
                deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                estimated_deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                total_reward = 0.0
                terminal = False
                while current_step < self.max_num_steps and not terminal:
                    path_planner.set_start_and_goal(x_estimate, goal_states,
                                                    self.goal_position,
                                                    self.goal_radius)
                    t0 = time.time()
                    (xs, us, zs, control_durations, num_generated_paths,
                     best_val, state_covariances, deviation_covariances,
                     estimated_deviation_covariances, mean_gen_times,
                     mean_eval_times, total_gen_times,
                     total_eval_times) = path_planner.plan_and_evaluate_paths(
                         self.num_paths, 0, current_step,
                         self.evaluation_horizon, P_t, deviation_covariance,
                         estimated_deviation_covariance, self.timeout)
                    mean_planning_time += time.time() - t0
                    mean_number_planning_steps += 1.0
                    num_generated_paths_run += num_generated_paths
                    if len(xs) == 0:
                        logging.error(
                            "MPC: Couldn't find any paths from start state" +
                            str(x_estimate) + " to goal states")
                        final_states.append(x_true)
                        #total_reward = np.array([-self.illegal_move_penalty])[0]
                        current_step += 1
                        break
                    x_tilde = np.array(
                        [0.0 for i in xrange(2 * self.robot_dof)])
                    n_steps = self.num_execution_steps

                    if n_steps > len(xs) - 1:
                        n_steps = len(xs) - 1
                    if current_step + n_steps > self.max_num_steps:
                        n_steps = self.max_num_steps - current_step
                    (x_true, x_tilde, x_tilde_linear, x_estimate, P_t,
                     current_step, total_reward, terminal, estimated_s,
                     estimated_c, history_entries) = sim.simulate_n_steps(
                         xs,
                         us,
                         zs,
                         control_durations,
                         x_true,
                         x_tilde,
                         x_tilde_linear,
                         x_estimate,
                         P_t,
                         total_reward,
                         current_step,
                         n_steps,
                         0.0,
                         0.0,
                         max_num_steps=self.max_num_steps)
                    #print "len(hist) " + str(len(history_entries))
                    #print "len(deviation_covariances) " + str(len(deviation_covariances))
                    #print "len(estimated_deviation_covariances) " + str(len(estimated_deviation_covariances))
                    try:
                        deviation_covariance = deviation_covariances[
                            len(history_entries) - 1]
                        estimated_deviation_covariance = estimated_deviation_covariances[
                            len(history_entries) - 1]
                    except:
                        print "what: len(deviation_covariances) " + str(
                            len(deviation_covariances))
                        print "len(history_entries) " + str(
                            len(history_entries))
                        print "len(xs) " + str(len(xs))

                    if (current_step == self.max_num_steps) or terminal:
                        final_states.append(history_entries[-1].x_true)
                        if terminal:
                            successful_runs += 1

                    history_entries[0].set_replanning(True)
                    for l in xrange(len(history_entries)):
                        try:
                            history_entries[l].set_estimated_covariance(
                                state_covariances[l])
                        except:
                            print "l " + str(l)
                            print "len(state_covariances) " + str(
                                len(state_covariances))

                        history_entries[l].serialize(tmp_dir, "log.log")
                        if history_entries[l].collided:
                            num_collisions += 1
                        linearization_error += history_entries[
                            l].linearization_error
                    #logging.warn("MPC: Execution finished. True state is " + str(x_true))
                    #logging.warn("MPC: Estimated state is " + str(x_estimate))
                    logging.warn("MPC: Executed " + str(current_step) +
                                 " steps")
                    logging.warn("MPC: terminal " + str(terminal))
                    if terminal:
                        logging.info("MPC: Final state: " + str(x_true))
                rewards_cov.append(total_reward)
                self.serializer.write_line(
                    "log.log", tmp_dir, "Reward: " + str(total_reward) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "\n")
                number_of_steps += current_step
            mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps
            mean_planning_time /= self.num_simulation_runs
            """ Calculate the distance to goal area
            """
            ee_position_distances = []
            for state in final_states:
                joint_angles = v_double()
                joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
                ee_position = v_double()
                self.robot.getEndEffectorPosition(joint_angles, ee_position)
                ee_position = np.array(
                    [ee_position[y] for y in xrange(len(ee_position))])
                dist = np.linalg.norm(
                    np.array(self.goal_position - ee_position))
                if dist < self.goal_radius:
                    dist = 0.0
                ee_position_distances.append(dist)
            logging.info("MPC: Done. total_reward is " + str(total_reward))
            try:
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(
                    np.array(ee_position_distances))
            except:
                print ee_position_distances

            self.serializer.write_line("log.log", tmp_dir,
                                       "################################# \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "inc_covariance: " + str(self.inc_covariance) + "\n")
            if self.inc_covariance == "process":
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Process covariance: " + str(m_covs[j]) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir, "Observation covariance: " +
                    str(self.min_observation_covariance) + " \n")
            elif self.inc_covariance == "observation":
                self.serializer.write_line(
                    "log.log", tmp_dir, "Process covariance: " +
                    str(self.min_process_covariance) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Observation covariance: " + str(m_covs[j]) + " \n")
            mean_linearization_error = linearization_error / number_of_steps
            number_of_steps /= self.num_simulation_runs
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Mean number of steps: " + str(number_of_steps) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean num collisions per run: " +
                str(float(num_collisions) / float(self.num_simulation_runs)) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Average distance to goal area: " +
                str(mean_distance_to_goal) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Num successes: " + str(successful_runs) + " \n")
            print "succ " + str(
                (100.0 / self.num_simulation_runs) * successful_runs)
            self.serializer.write_line(
                "log.log", tmp_dir, "Percentage of successful runs: " + str(
                    (100.0 / self.num_simulation_runs) * successful_runs) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per run: " +
                str(mean_planning_time) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per planning step: " +
                str(mean_planning_time_per_step) + " \n")

            n, min_max, mean, var, skew, kurt = scipy.stats.describe(
                np.array(rewards_cov))
            print "mean_rewards " + str(mean)
            #plt.plot_histogram_from_data(rewards_cov)
            #sleep
            self.serializer.write_line("log.log", tmp_dir,
                                       "Mean rewards: " + str(mean) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Reward variance: " + str(var) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean linearisation error: " +
                str(mean_linearization_error) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Seed: " + str(self.seed) + " \n")
            cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_mpc_" + str(
                m_covs[j]) + ".log"
            os.system(cmd)

        cmd = "cp " + self.abs_path + "/config_mpc.yaml " + dir
        os.system(cmd)

        if not os.path.exists(dir + "/environment"):
            os.makedirs(dir + "/environment")

        cmd = "cp " + self.abs_path + "/" + str(
            self.environment_file) + " " + str(dir) + "/environment"
        os.system(cmd)

        if not os.path.exists(dir + "/model"):
            os.makedirs(dir + "/model")

        cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
        os.system(cmd)
        print "Done."

    def init_robot(self, urdf_model_file):
        self.robot = Robot(self.abs_path + "/" + urdf_model_file)
        self.robot.enforceConstraints(self.enforce_constraints)
        self.robot.setGravityConstant(self.gravity_constant)
        self.robot.setAccelerationLimit(self.acceleration_limit)
        """ Setup operations """
        self.robot_dof = self.robot.getDOF()
        if len(self.start_state) != 2 * self.robot_dof:
            logging.error(
                "MPC: Start state dimension doesn't fit to the robot state space dimension"
            )
            return False
        return True

    def run_viewer(self, model_file, env_file):
        show_viewer = True
        rot = v_double()
        trans = v_double()
        rot[:] = [-1.0, 0.0, 0.0, 0.0]
        trans[:] = [0.0, 0.0, 3.0]
        if show_viewer:
            self.robot.setViewerBackgroundColor(0.6, 0.8, 0.6)
            self.robot.setViewerSize(1280, 768)
            self.robot.setupViewer(model_file, env_file)
        fx = 0.0
        fy = 0.0
        fz = 0.0
        f_roll = 0.0
        f_pitch = 0.0
        f_yaw = 0.0
        x = [0.0 for i in xrange(2 * self.robot_dof)]
        x = [
            0.8110760662014179, -0.2416850600576381, 1.5922944779127346,
            5.1412306972285471, -10.0, 2.5101115097604483
        ]

        integration_times = []
        collision_check_times1 = []
        collision_check_times2 = []

        y = 0
        while True:
            #u_in = [3.0, 1.5, 0.0, 0.0, 0.0, 0.0]
            u_in = [0.0 for i in xrange(self.robot_dof)]
            #u_in[0] = 150.0
            #u_in[1] = 70.0
            current_state = v_double()
            current_state[:] = x
            control = v_double()
            control[:] = u_in

            control_error = v_double()
            ce = [0.0 for i in xrange(self.robot_dof)]
            control_error[:] = ce
            result = v_double()

            t0 = time.time()
            self.robot.propagate(current_state, control, control_error,
                                 self.simulation_step_size, 0.03, result)
            t = time.time() - t0
            integration_times.append(t)
            if y == 10000:
                t_sum = sum(integration_times)
                t_mean = t_sum / len(integration_times)
                print "mean integration times: " + str(t_mean)
                t_sum = sum(collision_check_times1)
                t_mean = t_sum / len(collision_check_times1)
                print "mean collision check times old " + str(t_mean)
                t_mean = sum(collision_check_times2) / len(
                    collision_check_times2)
                print "mean collision check times new " + str(t_mean)
                sleep

            ja_start = v_double()
            ja_start[:] = [
                current_state[i] for i in xrange(len(current_state) / 2)
            ]
            collision_objects_start = self.robot.createRobotCollisionObjects(
                ja_start)
            joint_angles = v_double()
            joint_angles[:] = [result[i] for i in xrange(len(result) / 2)]
            collision_objects_goal = self.robot.createRobotCollisionObjects(
                joint_angles)
            #print "ee_velocity " + str([ee_velocity[i] for i in xrange(len(ee_velocity))])
            t_coll_check1 = time.time()
            t2 = time.time() - t_coll_check1
            collision_check_times2.append(t2)
            '''
            Get the end effector position
            '''
            #ee_position = v_double()
            #self.robot.getEndEffectorPosition(joint_angles, ee_position)
            #ee_collision_objects = self.robot.createEndEffectorCollisionObject(joint_angles)
            in_collision = False
            t_c = time.time()
            for o in self.obstacles:
                if o.inCollisionDiscrete(collision_objects_goal):
                    in_collision = True
                    break
                '''for i in xrange(len(collision_objects_start)):                        
                    if o.inCollisionContinuous([collision_objects_start[i], collision_objects_goal[i]]):
                        in_collision = True
                        break'''
            if in_collision:
                print "COLLISION"

            t = time.time() - t_c
            collision_check_times1.append(t)

            x = np.array([result[i] for i in xrange(len(result))])
            cjvals = v_double()
            cjvels = v_double()
            cjvals_arr = [x[i] for i in xrange(len(x) / 2)]
            cjvels_arr = [x[i] for i in xrange(len(x) / 2, len(x))]
            cjvals[:] = cjvals_arr
            cjvels[:] = cjvels_arr
            particle_joint_values = v2_double()
            if show_viewer:
                self.robot.updateViewerValues(cjvals, cjvels,
                                              particle_joint_values,
                                              particle_joint_values)
            time.sleep(0.03)

            y += 1
            print y

    def setup_scene(self, environment_file, robot):
        """ Load the obstacles """
        self.obstacles = self.utils.loadObstaclesXML(self.abs_path + "/" +
                                                     environment_file)
        """ Load the goal area """
        goal_area = v_double()
        self.utils.loadGoalArea(self.abs_path + "/" + environment_file,
                                goal_area)
        if len(goal_area) == 0:
            print "ERROR: Your environment file doesn't define a goal area"
            return False
        self.goal_position = [goal_area[i] for i in xrange(0, 3)]
        self.goal_radius = goal_area[3]
        return True

    def init_serializer(self):
        self.serializer = Serializer()
        self.serializer.create_temp_dir(self.abs_path, "mpc")

    def clear_stats(self, dir):
        if os.path.isdir(dir):
            cmd = "rm -rf " + dir + "/*"
            os.system(cmd)
        else:
            os.makedirs(dir)

    def get_average_distance_to_goal_area(self, goal_position, goal_radius,
                                          cartesian_coords):
        avg_dist = 0.0
        goal_pos = np.array(goal_position)
        for i in xrange(len(cartesian_coords)):
            cart = np.array(cartesian_coords[i])
            dist = np.linalg.norm(goal_pos - cart)
            if dist < goal_radius:
                dist = 0.0
            avg_dist += dist
        if avg_dist == 0.0:
            return avg_dist
        return np.asscalar(avg_dist) / len(cartesian_coords)

    def calc_covariance_value(self,
                              robot,
                              error,
                              covariance_matrix,
                              covariance_type='process'):
        active_joints = v_string()
        robot.getActiveJoints(active_joints)
        if covariance_type == 'process':
            if self.dynamic_problem:
                torque_limits = v_double()
                robot.getJointTorqueLimits(active_joints, torque_limits)
                torque_limits = [
                    torque_limits[i] for i in xrange(len(torque_limits))
                ]
                for i in xrange(len(torque_limits)):
                    torque_range = 2.0 * torque_limits[i]
                    covariance_matrix[i, i] = np.square(
                        (torque_range / 100.0) * error)
            else:
                for i in xrange(self.robot_dof):
                    covariance_matrix[i, i] = np.square(
                        (self.max_velocity / 100.0) * error)
        else:
            lower_position_limits = v_double()
            upper_position_limits = v_double()
            velocity_limits = v_double()
            robot.getJointLowerPositionLimits(active_joints,
                                              lower_position_limits)
            robot.getJointUpperPositionLimits(active_joints,
                                              upper_position_limits)
            robot.getJointVelocityLimits(active_joints, velocity_limits)
            for i in xrange(self.robot_dof):
                position_range = upper_position_limits[
                    i] - lower_position_limits[i]
                covariance_matrix[i, i] = np.square(
                    (position_range / 100.0) * error)
                velocity_range = 2.0 * velocity_limits[i]
                covariance_matrix[i + self.robot_dof,
                                  i + self.robot_dof] = np.square(
                                      (velocity_range / 100.0) * error)
        return covariance_matrix

    def problem_setup(self, delta_t, num_links):
        A = np.identity(num_links * 2)
        H = np.identity(num_links * 2)
        W = np.identity(num_links * 2)
        C = self.path_deviation_cost * np.identity(num_links * 2)
        '''B = delta_t * np.identity(num_links * 2)                
        V = np.identity(num_links * 2)
        D = self.control_deviation_cost * np.identity(num_links * 2)        
        M_base = np.identity(2 * self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)'''

        B = delta_t * np.vstack(
            (np.identity(num_links), np.zeros((num_links, num_links))))
        V = np.vstack((np.identity(num_links), np.zeros(
            (num_links, num_links))))
        D = self.control_deviation_cost * np.identity(num_links)
        M_base = np.identity(self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)

        return A, H, B, V, W, C, D, M_base, N_base

    def set_params(self, config):
        self.num_paths = config['num_generated_paths']
        self.use_linear_path = config['use_linear_path']
        self.max_velocity = config['max_velocity']
        self.delta_t = 1.0 / config['control_rate']
        self.start_state = config['start_state']
        self.num_simulation_runs = config['num_simulation_runs']
        self.num_bins = config['num_bins']
        self.min_process_covariance = config['min_process_covariance']
        self.max_process_covariance = config['max_process_covariance']
        self.covariance_steps = config['covariance_steps']
        self.min_observation_covariance = config['min_observation_covariance']
        self.max_observation_covariance = config['max_observation_covariance']
        self.discount_factor = config['discount_factor']
        self.illegal_move_penalty = config['illegal_move_penalty']
        self.step_penalty = config['step_penalty']
        self.exit_reward = config['exit_reward']
        self.stop_when_terminal = config['stop_when_terminal']
        self.enforce_constraints = config['enforce_constraints']
        self.enforce_control_constraints = config[
            'enforce_control_constraints']
        self.sample_size = config['sample_size']
        self.plot_paths = config['plot_paths']
        self.planning_algortihm = config['planning_algorithm']
        self.dynamic_problem = config['dynamic_problem']
        self.simulation_step_size = config['simulation_step_size']
        self.path_timeout = config['path_timeout']
        self.continuous_collision = config['continuous_collision_check']
        self.show_viewer_evaluation = config['show_viewer_evaluation']
        self.show_viewer_simulation = config['show_viewer_simulation']
        self.path_deviation_cost = config['path_deviation_cost']
        self.control_deviation_cost = config['control_deviation_cost']
        self.num_control_samples = config['num_control_samples']
        self.min_control_duration = config['min_control_duration']
        self.max_control_duration = config['max_control_duration']
        self.inc_covariance = config['inc_covariance']
        self.add_intermediate_states = config['add_intermediate_states']
        self.gravity_constant = config['gravity']
        self.num_generated_goal_states = config['num_generated_goal_states']
        self.robot_file = config['robot_file']
        self.environment_file = config['environment_file']
        self.rrt_goal_bias = config['rrt_goal_bias']
        self.control_sampler = config['control_sampler']
        self.max_num_steps = config['max_num_steps']
        self.evaluation_horizon = config['horizon']
        self.timeout = config['timeout']
        self.seed = config['seed']
        self.num_cores = config['num_cores']
        self.replan_when_colliding = config['replan_when_colliding']
        self.acceleration_limit = config['acceleration_limit']
        """
        The number of steps a path is being executed before a new path gets calculated
        """
        self.num_execution_steps = config['num_execution_steps']
Esempio n. 6
0
File: HRF.py Progetto: hoergems/LQG
class HRF:
    def __init__(self):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        cmd = "rm -rf " + self.abs_path + "/tmp"
        popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True)
        popen.wait()
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_hrf.yaml", path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 42949672)
        np.random.seed(self.seed)
        
        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level)        
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/hrf"
        tmp_dir = self.abs_path + "/tmp/hrf"
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("HRF: Couldn't initialize robot")
            return               
        if not self.setup_scene(self.environment_file, self.robot):
            return        
            
        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator() 
        plan_adjuster = PlanAdjuster()       
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()
        if self.show_viewer_simulation:
            self.robot.setupViewer(self.robot_file, self.environment_file)            
        
        logging.info("HRF: Generating goal states...")
        problem_feasible = False
        while not problem_feasible:
            print "Creating random scene..." 
            #self.create_random_obstacles(20)
            goal_states = get_goal_states("hrf",
                                          self.abs_path,
                                          self.serializer, 
                                          self.obstacles,                                                                           
                                          self.robot,                                    
                                          self.max_velocity,
                                          self.delta_t,
                                          self.start_state,
                                          self.goal_position,
                                          self.goal_radius,
                                          self.planning_algortihm,
                                          self.path_timeout,
                                          self.num_generated_goal_states,
                                          self.continuous_collision,
                                          self.environment_file,
                                          self.num_cores)
            
            if len(goal_states) == 0:
                logging.error("HRF: Couldn't generate any goal states. Problem seems to be infeasible")
            else:
                problem_feasible = True
        '''obst = v_obstacle()
        obst[:] = self.obstacles
        self.robot.addObstacles(obst)'''
        logging.info("HRF: Generated " + str(len(goal_states)) + " goal states")
        sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward)
        path_planner.setup(self.robot,                         
                           self.obstacles,  
                           self.max_velocity, 
                           self.delta_t, 
                           self.use_linear_path,
                           self.planning_algortihm,
                           self.path_timeout,
                           self.continuous_collision,
                           self.num_cores)
        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(self.robot_file,
                                               self.environment_file,
                                               self.simulation_step_size,
                                               self.num_control_samples,
                                               self.min_control_duration,
                                               self.max_control_duration,
                                               self.add_intermediate_states,
                                               self.rrt_goal_bias,
                                               self.control_sampler)
        
        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance, 
                                     self.max_process_covariance, 
                                     self.covariance_steps)                 
            elif self.inc_covariance == "observation":          
                m_covs = np.linspace(self.min_observation_covariance, 
                                     self.max_observation_covariance,
                                     self.covariance_steps)
        for j in xrange(len(m_covs)):
            M = None
            N = None
            if self.inc_covariance == "process":
                """ The process noise covariance matrix """
                M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
                #M = m_covs[j] * M_base
                    
                """ The observation error covariance matrix """
                N = self.calc_covariance_value(self.robot, 
                                               self.min_observation_covariance, 
                                               N_base, 
                                               covariance_type='observation')                    
            elif self.inc_covariance == "observation":
                M = self.calc_covariance_value(self.robot, 
                                               self.min_process_covariance,
                                               M_base)
                N = self.calc_covariance_value(self.robot, 
                                               m_covs[j],
                                               N_base, 
                                               covariance_type='observation')
            
            path_planner.setup_path_evaluator(A, B, C, D, H, M, N, V, W,                                     
                                              self.robot, 
                                              self.sample_size, 
                                              self.obstacles,
                                              self.goal_position,
                                              self.goal_radius,                                              
                                              self.robot_file,
                                              self.environment_file)
            sim.setup_problem(A, B, C, D, H, V, W, M, N,
                              self.robot, 
                              self.enforce_control_constraints,
                              self.obstacles, 
                              self.goal_position, 
                              self.goal_radius,
                              self.max_velocity,                                  
                              self.show_viewer_simulation,
                              self.robot_file,
                              self.environment_file,
                              self.knows_collision)
            path_evaluator.setup(A, B, C, D, H, M, N, V, W,                                     
                                 self.robot, 
                                 self.sample_size, 
                                 self.obstacles,
                                 self.goal_position,
                                 self.goal_radius,
                                 self.show_viewer_evaluation,
                                 self.robot_file,
                                 self.environment_file,
                                 self.num_cores)
            path_evaluator.setup_reward_function(self.step_penalty, self.illegal_move_penalty, self.exit_reward, self.discount_factor)
            plan_adjuster.setup(self.robot,
                                M, 
                                H, 
                                W, 
                                N, 
                                C, 
                                D,
                                self.dynamic_problem, 
                                self.enforce_control_constraints)
            plan_adjuster.set_simulation_step_size(self.simulation_step_size)
            plan_adjuster.set_model_matrices(A, B, V)
            if not self.dynamic_problem:
                plan_adjuster.set_max_joint_velocities_linear_problem(np.array([self.max_velocity for i in xrange(self.robot_dof)]))
            sim.set_stop_when_colliding(self.replan_when_colliding)
            if self.dynamic_problem:
                path_evaluator.setup_dynamic_problem()
                sim.setup_dynamic_problem(self.simulation_step_size)
            path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor)
            mean_number_planning_steps = 0.0
            number_of_steps = 0.0
            mean_planning_time = 0.0
            num_generated_paths_run = 0.0
            successful_runs = 0
            num_collisions = 0.0
            linearization_error = 0.0
            final_states= []
            rewards_cov = []
            for k in xrange(self.num_simulation_runs):
                print "HRF: Run " + str(k + 1)                                
                self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n")
                current_step = 0
                x_true = np.array([self.start_state[m] for m in xrange(len(self.start_state))])
                x_estimated = np.array([self.start_state[m] for m in xrange(len(self.start_state))])
                #x_estimated[0] += 0.2                
                #x_estimated[0] = 0.4             
                #x_predicted = self.start_state                               
                P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) 
                P_ext_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) 
                deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])
                estimated_deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])              
                total_reward = 0.0
                terminal = False 
                last_x_true = None               
                
                """
                Obtain a nominal path
                """
                print "plan"
                path_planner.set_start_and_goal(x_estimated, goal_states, self.goal_position, self.goal_radius)
                t0 = time.time()                
                (xs, 
                 us, 
                 zs,
                 control_durations, 
                 num_generated_paths, 
                 objective,
                 state_covariances,
                 deviation_covariances,
                 estimated_deviation_covariances, 
                 mean_gen_times, 
                 mean_eval_times,
                 total_gen_times,
                 total_eval_times) = path_planner.plan_and_evaluate_paths(1, 
                                                                          0, 
                                                                          current_step, 
                                                                          self.evaluation_horizon, 
                                                                          P_t,
                                                                          deviation_covariance,
                                                                          estimated_deviation_covariance, 
                                                                          0.0)
                print "objective " + str(objective)
                
                while True: 
                    print "current step " + str(current_step) 
                    '''if current_step == 7:
                        x_true = np.array([last_x_true[k] for k in xrange(len(last_x_true))])
                        for k in xrange(len(last_x_true) / 2, len(last_x_true)):
                            last_x_true[k] = 0.0'''   
                    """
                    Predict system state at t+1 using nominal path
                    """
                    
                    """ Get state matrices """
                    #As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(xs, us, control_durations)
                    As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices([x_estimated], [us[0]], control_durations)
                    
                    """ Predict using EKF """
                    (x_predicted_temp, P_predicted) = kalman.predict_state(self.robot,
                                                                           x_estimated,                                                                     
                                                                           us[0], 
                                                                           control_durations[0],
                                                                           self.simulation_step_size, 
                                                                           As[0],
                                                                           Bs[0],
                                                                           Vs[0],
                                                                           Ms[0],
                                                                           P_ext_t,
                                                                           self.dynamic_problem)                    
                    
                    """ Make sure x_predicted fulfills the constraints """                 
                    if self.enforce_constraints:     
                        x_predicted_temp = sim.check_constraints(x_predicted_temp)
                    predicted_collided = True              
                    if not sim.is_in_collision([], x_predicted_temp)[0]:                                                                                                    
                        x_predicted = x_predicted_temp
                        predicted_collided = False
                    else: 
                        print "X_PREDICTED COLLIDES!"
                        x_predicted = x_estimated         
                        for l in xrange(len(x_predicted) / 2, len(x_predicted)):                            
                            x_predicted[l] = 0 
                            
                    last_x_true = np.array([x_true[k] for k in xrange(len(x_true))])  
                    
                    """
                    Execute path for 1 time step
                    """                    
                    (x_true,                     
                     x_tilde,
                     x_tilde_linear, 
                     x_estimated_dash,
                     z, 
                     P_t, 
                     current_step, 
                     total_reward,
                     terminal,
                     estimated_s,
                     estimated_c,
                     history_entries) = sim.simulate_n_steps(xs, us, zs,
                                                             control_durations,
                                                             x_true,
                                                             x_estimated,
                                                             P_t,
                                                             total_reward,                                                                 
                                                             current_step,
                                                             1,
                                                             0.0,
                                                             0.0,
                                                             max_num_steps=self.max_num_steps)
                    
                    history_entries[-1].set_best_reward(objective)                                        
                     
                    """
                    Process history entries
                    """                    
                    try:
                        deviation_covariance = deviation_covariances[len(history_entries) - 1]
                        estimated_deviation_covariance = estimated_deviation_covariances[len(history_entries) - 1]
                    except:
                        print "what: len(deviation_covariances) " + str(len(deviation_covariances))
                        print "len(history_entries) " + str(len(history_entries))
                        print "len(xs) " + str(len(xs))
                    
                    history_entries[0].set_replanning(True)                                           
                    for l in xrange(len(history_entries)):
                        try:
                            history_entries[l].set_estimated_covariance(state_covariances[l])
                        except:
                            print "l " + str(l)
                            print "len(state_covariances) " + str(len(state_covariances))                                                   
                        
                        if history_entries[l].collided:                            
                            num_collisions += 1                            
                        linearization_error += history_entries[l].linearization_error
                    if (current_step == self.max_num_steps) or terminal:
                        history_entries[len(history_entries) - 2].set_best_reward(objective)
                        history_entries[-1].set_best_reward(None)
                        for l in xrange(len(history_entries)):                            
                            history_entries[l].serialize(tmp_dir, "log.log")
                        final_states.append(history_entries[-1].x_true)                        
                        if terminal:
                            print "Terminal state reached"
                            successful_runs += 1
                        break                    
                        
                    
                    
                    """
                    Plan new trajectories from predicted state
                    """
                    path_planner.set_start_and_goal(x_predicted, goal_states, self.goal_position, self.goal_radius) 
                    t0 = time.time()                   
                    paths = path_planner.plan_paths(self.num_paths, 0, planning_timeout=self.timeout, min_num_paths=1)
                    mean_planning_time += time.time() - t0
                    mean_number_planning_steps += 1.0
                    num_generated_paths_run += len(paths)                    
                    
                    """
                    Filter update
                    """
                    (x_estimated_temp, P_ext_t) = kalman.filter_update(x_predicted, P_predicted, z, Hs[0], Ws[0], Ns[0])
                    
                    
                    """ Make sure x_estimated fulfills the constraints """                 
                    if self.enforce_constraints:     
                        x_estimated_temp = sim.check_constraints(x_estimated_temp) 
                    in_collision, colliding_obstacle = sim.is_in_collision([], x_estimated_temp)
                    if in_collision:
                        history_entries[-1].set_estimate_collided(True)                        
                        for l in xrange(len(x_estimated) / 2, len(x_estimated)):                            
                            x_estimated[l] = 0
                    elif history_entries[-1].collided and self.knows_collision:
                        for l in xrange(len(x_estimated) / 2, len(x_estimated)):                            
                            x_estimated[l] = 0
                    else:
                        x_estimated = x_estimated_temp                         
                        history_entries[-1].set_estimate_collided(False)
                    
                    history_entries[-1].serialize(tmp_dir, "log.log")
                    """
                    Adjust plan
                    """ 
                    t0 = time.time()                   
                    (xs_adj, us_adj, zs_adj, control_durations_adj, adjusted) = plan_adjuster.adjust_plan(self.robot,
                                                                                                         (xs, us, zs, control_durations),                                                                                                
                                                                                                         x_estimated,
                                                                                                         P_t)
                    if not adjusted:
                        final_states.append(history_entries[-1].x_true)
                        print "current step " + str(current_step)
                        raise ValueError("Raised")
                        break
                    if self.show_viewer_simulation:
                        sim.update_viewer(x_true, x_estimated, z, control_duration=0.03, colliding_obstacle=sim.colliding_obstacle)
                    
                    """
                    Evaluate the adjusted plan and the planned paths
                    """
                    #if self.is_terminal(xs_adj[-1]) and not len(xs_adj)==1:
                    if not len(xs_adj)<=1:
                        """
                        Only evaluate the adjusted path if it's > 1
                        """
                        paths.extend([[xs_adj, us_adj, zs_adj, control_durations_adj]])
                    if len(paths) == 0:
                        """
                        Running out of paths
                        """
                        print "Error: Couldn't generate an evaluate any paths"
                        final_states.append(history_entries[-1].x_true)
                        break
                    
                    (path_index,
                     xs, 
                     us, 
                     zs, 
                     control_durations, 
                     objective, 
                     state_covariances,
                     deviation_covariances,
                     estimated_deviation_covariances) = path_evaluator.evaluate_paths(paths, 
                                                                                      P_t, 
                                                                                      current_step)
                    mean_planning_time += time.time() - t0
                    if path_index == None:
                        """
                        We couldn't evaluate any paths
                        """
                        final_states.append(history_entries[-1].x_true)
                        break
                    
                    if self.show_viewer_simulation:
                        self.visualize_paths(self.robot, [xs])    
                                                 
                rewards_cov.append(total_reward)                
                self.serializer.write_line("log.log", 
                                           tmp_dir,
                                           "Reward: " + str(total_reward) + " \n") 
                self.serializer.write_line("log.log", 
                                           tmp_dir,
                                           "\n")
                number_of_steps += current_step
            mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps 
            mean_planning_time /= self.num_simulation_runs
            mean_num_generated_paths_step = num_generated_paths_run / mean_number_planning_steps
                                
            """ Calculate the distance to goal area
            """  
            ee_position_distances = [] 
            for state in final_states:
                joint_angles = v_double()
                joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
                ee_position = v_double()
                self.robot.getEndEffectorPosition(joint_angles, ee_position)
                ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))])
                dist = np.linalg.norm(np.array(self.goal_position - ee_position))
                if dist < self.goal_radius:
                    dist = 0.0
                ee_position_distances.append(dist)
            logging.info("HRF: Done. total_reward is " + str(total_reward))
            try:
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances))
            except:
                print ee_position_distances                                         
                
            self.serializer.write_line("log.log", tmp_dir, "################################# \n")
            self.serializer.write_line("log.log",
                                       tmp_dir,
                                       "inc_covariance: " + str(self.inc_covariance) + "\n")
            if self.inc_covariance == "process":                  
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Process covariance: " + str(m_covs[j]) + " \n")
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Observation covariance: " + str(self.min_observation_covariance) + " \n")
            elif self.inc_covariance == "observation":                    
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Process covariance: " + str(self.min_process_covariance) + " \n")
                self.serializer.write_line("log.log",
                                           tmp_dir,
                                           "Observation covariance: " + str(m_covs[j]) + " \n")
            mean_linearization_error = linearization_error / number_of_steps
            number_of_steps /= self.num_simulation_runs
            self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n") 
            self.serializer.write_line("log.log", tmp_dir, "Mean number of generated paths per step: " + str(mean_num_generated_paths_step) + " \n")                            
            self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n")
            self.serializer.write_line("log.log", 
                                       tmp_dir, 
                                       "Average distance to goal area: " + str(mean_distance_to_goal) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n")
            print "succ " + str((100.0 / self.num_simulation_runs) * successful_runs)
            self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successful_runs) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n")
            
            n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov))
            print "mean_rewards " + str(mean)
            #plt.plot_histogram_from_data(rewards_cov)
            #sleep
            self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Mean linearisation error: " + str(mean_linearization_error) + " \n")
            self.serializer.write_line("log.log", 
                                       tmp_dir, 
                                       "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
            self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n")
            cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_hrf_" + str(m_covs[j]) + ".log"
            os.system(cmd)
        cmd = "cp " + self.abs_path + "/config_hrf.yaml " + dir           
        os.system(cmd)
        
        if not os.path.exists(dir + "/environment"):
            os.makedirs(dir + "/environment") 
            
        cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment"
        os.system(cmd) 
            
        if not os.path.exists(dir + "/model"):
            os.makedirs(dir + "/model")
                
        cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
        os.system(cmd)
        print "Done."
                    
    def is_terminal(self, x):
        """
        Check if x is terminal
        """
        state = v_double()
        state[:] = [x[j] for j in xrange(len(x) / 2)]
        ee_position_arr = v_double()
        self.robot.getEndEffectorPosition(state, ee_position_arr)
        ee_position = np.array([ee_position_arr[j] for j in xrange(len(ee_position_arr))])
        norm = np.linalg.norm(ee_position - self.goal_position)               
        if norm - 0.01 <= self.goal_radius:                       
            return True
        return False
      
    def visualize_x(self, robot, x, color=None):
        cjvals = v_double()
        cjvels = v_double()
        cjvals[:] = [x[i] for i in xrange(len(x) / 2)]
        cjvels[:] = [x[i] for i in xrange(len(x) / 2, len(x))]
        particle_joint_values = v2_double()
        robot.updateViewerValues(cjvals, 
                                 cjvels, 
                                 particle_joint_values,
                                 particle_joint_values)              
                    
    def visualize_paths(self, robot, paths, colors=None):
        robot.removePermanentViewerParticles()        
        particle_joint_values = v2_double()
        particle_joint_colors = v2_double()
        pjvs = []
        for k in xrange(len(paths)):
            for p in paths[k]:
                particle = v_double()
                particle[:] = [p[i] for i in xrange(len(p) / 2)]
                pjvs.append(particle)
                if colors == None:
                    color = v_double()
                    color[:] = [0.0, 0.0, 0.0, 0.7]
                    particle_joint_colors.append(color)        
                else:
                    c = v_double()
                    c[:] = color
                    particle_joint_colors.append(c)
                
        particle_joint_values[:] = pjvs
        self.robot.addPermanentViewerParticles(particle_joint_values,
                                               particle_joint_colors)        
        
                    
        
    def init_serializer(self):        
        self.serializer = Serializer()
        self.serializer.create_temp_dir(self.abs_path, "hrf")
        
    def setup_viewer(self, robot):
        robot.setupViewer(model_file, env_file)
        
    def init_robot(self, urdf_model_file):
        self.robot = Robot(self.abs_path + "/" + urdf_model_file)
        self.robot.enforceConstraints(self.enforce_constraints)
        self.robot.setGravityConstant(self.gravity_constant)
        self.robot.setAccelerationLimit(self.acceleration_limit)
        """ Setup operations """
        self.robot_dof = self.robot.getDOF()        
        if len(self.start_state) != 2 * self.robot_dof:
            logging.error("HRF: Start state dimension doesn't fit to the robot state space dimension")
            return False
        return True 
    
    def setup_scene(self,                    
                    environment_file,
                    robot):
        """ Load the obstacles """         
        self.obstacles = self.utils.loadObstaclesXML(self.abs_path + "/" + environment_file)      
        
        """ Load the goal area """
        goal_area = v_double()
        self.utils.loadGoalArea(self.abs_path + "/" + environment_file, goal_area)
        if len(goal_area) == 0:
            print "ERROR: Your environment file doesn't define a goal area"
            return False
        self.goal_position = [goal_area[i] for i in xrange(0, 3)]
        self.goal_radius = goal_area[3]
        return True
    
    def create_random_obstacles(self, n):
        self.obstacles = []        
        for i in xrange(n): 
            name = "obst_" + str(i)           
            x_pos = np.random.uniform(-1.0, 4.0)
            y_pos = np.random.uniform(-1.0, 4.0)
            z_pos = np.random.uniform(3.0, 6.0)
        
            x_size = 0.25
            y_size = 0.25
            z_size = 0.25
            
            obst = self.utils.generateObstacle(name,
                                               x_pos,
                                               y_pos,
                                               z_pos,
                                               x_size,
                                               y_size,
                                               z_size)            
            self.obstacles.append(obst[0])
                
    
    def clear_stats(self, dir):
        if os.path.isdir(dir):
            cmd = "rm -rf " + dir + "/*"            
            os.system(cmd)
        else:
            os.makedirs(dir)
            
    def calc_covariance_value(self, robot, error, covariance_matrix, covariance_type='process'):
        active_joints = v_string()
        robot.getActiveJoints(active_joints)        
        if covariance_type == 'process':
            if self.dynamic_problem:            
                torque_limits = v_double()
                robot.getJointTorqueLimits(active_joints, torque_limits)
                torque_limits = [torque_limits[i] for i in xrange(len(torque_limits))]
                for i in xrange(len(torque_limits)):
                    torque_range = 2.0 * torque_limits[i]
                    covariance_matrix[i, i] = np.square((torque_range / 100.0) * error)
            else:
                for i in xrange(self.robot_dof):
                    covariance_matrix[i, i] = np.square((self.max_velocity / 100.0) * error)
        else:
            lower_position_limits = v_double()
            upper_position_limits = v_double()
            velocity_limits = v_double()            
            robot.getJointLowerPositionLimits(active_joints, lower_position_limits)
            robot.getJointUpperPositionLimits(active_joints, upper_position_limits)
            robot.getJointVelocityLimits(active_joints, velocity_limits)            
            for i in xrange(self.robot_dof):
                position_range = upper_position_limits[i] - lower_position_limits[i]
                covariance_matrix[i, i] = np.square((position_range / 100.0) * error)            
                velocity_range = 2.0 * velocity_limits[i]
                covariance_matrix[i + self.robot_dof, i + self.robot_dof] = np.square((velocity_range / 100.0) * error)          
        return covariance_matrix     
            
    def problem_setup(self, delta_t, num_links):
        A = np.identity(num_links * 2)
        H = np.identity(num_links * 2)
        W = np.identity(num_links * 2)
        C = self.path_deviation_cost * np.identity(num_links * 2)
        
        '''B = delta_t * np.identity(num_links * 2)                
        V = np.identity(num_links * 2)
        D = self.control_deviation_cost * np.identity(num_links * 2)        
        M_base = np.identity(2 * self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)'''
        
        B = delta_t * np.vstack((np.identity(num_links),
                                 np.zeros((num_links, num_links))))
        V = np.vstack((np.identity(num_links),
                       np.zeros((num_links, num_links))))
        D = self.control_deviation_cost * np.identity(num_links)
        M_base = np.identity(self.robot_dof)
        N_base = np.identity(2 * self.robot_dof)
        
        
        return A, H, B, V, W, C, D, M_base, N_base
        
        
    def set_params(self, config):
        self.num_paths = config['num_generated_paths']
        self.use_linear_path = config['use_linear_path']        
        self.max_velocity = config['max_velocity']
        self.delta_t = 1.0 / config['control_rate']
        self.start_state = config['start_state']        
        self.num_simulation_runs = config['num_simulation_runs']
        self.num_bins = config['num_bins']
        self.min_process_covariance = config['min_process_covariance']
        self.max_process_covariance = config['max_process_covariance']
        self.covariance_steps = config['covariance_steps']
        self.min_observation_covariance = config['min_observation_covariance']
        self.max_observation_covariance = config['max_observation_covariance']       
        self.discount_factor = config['discount_factor']
        self.illegal_move_penalty = config['illegal_move_penalty']
        self.step_penalty = config['step_penalty']
        self.exit_reward = config['exit_reward']
        self.stop_when_terminal = config['stop_when_terminal']        
        self.enforce_constraints = config['enforce_constraints']
        self.enforce_control_constraints = config['enforce_control_constraints']
        self.sample_size = config['sample_size']
        self.plot_paths = config['plot_paths']
        self.planning_algortihm = config['planning_algorithm']
        self.dynamic_problem = config['dynamic_problem'] 
        self.simulation_step_size = config['simulation_step_size']        
        self.path_timeout = config['path_timeout'] 
        self.continuous_collision = config['continuous_collision_check']
        self.show_viewer_evaluation = config['show_viewer_evaluation']
        self.show_viewer_simulation = config['show_viewer_simulation']   
        self.path_deviation_cost = config['path_deviation_cost'] 
        self.control_deviation_cost = config['control_deviation_cost']
        self.num_control_samples = config['num_control_samples'] 
        self.min_control_duration = config['min_control_duration']
        self.max_control_duration = config['max_control_duration']   
        self.inc_covariance = config['inc_covariance'] 
        self.add_intermediate_states = config['add_intermediate_states']
        self.gravity_constant = config['gravity']
        self.num_generated_goal_states = config['num_generated_goal_states']
        self.robot_file = config['robot_file']
        self.environment_file = config['environment_file']
        self.rrt_goal_bias = config['rrt_goal_bias']
        self.control_sampler = config['control_sampler']
        self.max_num_steps = config['max_num_steps']
        self.evaluation_horizon = config['horizon']
        self.timeout = config['timeout']
        self.seed = config['seed']
        self.num_cores = config['num_cores']
        self.replan_when_colliding = config['replan_when_colliding']
        self.acceleration_limit = config['acceleration_limit']
        self.knows_collision = config['knows_collision']