Exemple #1
0
    def __init__(self):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        cmd = "rm -rf " + self.abs_path + "/tmp"
        popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True)
        popen.wait()
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_hrf.yaml",
                                             path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 42949672)
        np.random.seed(self.seed)

        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s',
                            level=logging_level)
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/hrf"
        tmp_dir = self.abs_path + "/tmp/hrf"
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("HRF: Couldn't initialize robot")
            return
        if not self.setup_scene(self.environment_file, self.robot):
            return

        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator()
        plan_adjuster = PlanAdjuster()
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()
        if self.show_viewer_simulation:
            self.robot.setupViewer(self.robot_file, self.environment_file)

        logging.info("HRF: Generating goal states...")
        problem_feasible = False
        while not problem_feasible:
            print "Creating random scene..."
            #self.create_random_obstacles(20)
            goal_states = get_goal_states(
                "hrf", self.abs_path, self.serializer, self.obstacles,
                self.robot, self.max_velocity, self.delta_t, self.start_state,
                self.goal_position, self.goal_radius, self.planning_algortihm,
                self.path_timeout, self.num_generated_goal_states,
                self.continuous_collision, self.environment_file,
                self.num_cores)

            if len(goal_states) == 0:
                logging.error(
                    "HRF: Couldn't generate any goal states. Problem seems to be infeasible"
                )
            else:
                problem_feasible = True
        '''obst = v_obstacle()
        obst[:] = self.obstacles
        self.robot.addObstacles(obst)'''
        logging.info("HRF: Generated " + str(len(goal_states)) +
                     " goal states")
        sim.setup_reward_function(self.discount_factor, self.step_penalty,
                                  self.illegal_move_penalty, self.exit_reward)
        path_planner.setup(self.robot, self.obstacles, self.max_velocity,
                           self.delta_t, self.use_linear_path,
                           self.planning_algortihm, self.path_timeout,
                           self.continuous_collision, self.num_cores)
        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(
                self.robot_file, self.environment_file,
                self.simulation_step_size, self.num_control_samples,
                self.min_control_duration, self.max_control_duration,
                self.add_intermediate_states, self.rrt_goal_bias,
                self.control_sampler)

        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(
            self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance,
                                     self.max_process_covariance,
                                     self.covariance_steps)
            elif self.inc_covariance == "observation":
                m_covs = np.linspace(self.min_observation_covariance,
                                     self.max_observation_covariance,
                                     self.covariance_steps)
        for j in xrange(len(m_covs)):
            M = None
            N = None
            if self.inc_covariance == "process":
                """ The process noise covariance matrix """
                M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
                #M = m_covs[j] * M_base
                """ The observation error covariance matrix """
                N = self.calc_covariance_value(self.robot,
                                               self.min_observation_covariance,
                                               N_base,
                                               covariance_type='observation')
            elif self.inc_covariance == "observation":
                M = self.calc_covariance_value(self.robot,
                                               self.min_process_covariance,
                                               M_base)
                N = self.calc_covariance_value(self.robot,
                                               m_covs[j],
                                               N_base,
                                               covariance_type='observation')

            path_planner.setup_path_evaluator(
                A, B, C, D, H, M, N, V, W, self.robot, self.sample_size,
                self.obstacles, self.goal_position, self.goal_radius,
                self.robot_file, self.environment_file)
            sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot,
                              self.enforce_control_constraints, self.obstacles,
                              self.goal_position, self.goal_radius,
                              self.max_velocity, self.show_viewer_simulation,
                              self.robot_file, self.environment_file,
                              self.knows_collision)
            path_evaluator.setup(A, B, C, D, H, M, N, V, W, self.robot,
                                 self.sample_size, self.obstacles,
                                 self.goal_position, self.goal_radius,
                                 self.show_viewer_evaluation, self.robot_file,
                                 self.environment_file, self.num_cores)
            path_evaluator.setup_reward_function(self.step_penalty,
                                                 self.illegal_move_penalty,
                                                 self.exit_reward,
                                                 self.discount_factor)
            plan_adjuster.setup(self.robot, M, H, W, N, C, D,
                                self.dynamic_problem,
                                self.enforce_control_constraints)
            plan_adjuster.set_simulation_step_size(self.simulation_step_size)
            plan_adjuster.set_model_matrices(A, B, V)
            if not self.dynamic_problem:
                plan_adjuster.set_max_joint_velocities_linear_problem(
                    np.array(
                        [self.max_velocity for i in xrange(self.robot_dof)]))
            sim.set_stop_when_colliding(self.replan_when_colliding)
            if self.dynamic_problem:
                path_evaluator.setup_dynamic_problem()
                sim.setup_dynamic_problem(self.simulation_step_size)
            path_planner.setup_reward_function(self.step_penalty,
                                               self.exit_reward,
                                               self.illegal_move_penalty,
                                               self.discount_factor)
            mean_number_planning_steps = 0.0
            number_of_steps = 0.0
            mean_planning_time = 0.0
            num_generated_paths_run = 0.0
            successful_runs = 0
            num_collisions = 0.0
            linearization_error = 0.0
            final_states = []
            rewards_cov = []
            for k in xrange(self.num_simulation_runs):
                print "HRF: Run " + str(k + 1)
                self.serializer.write_line("log.log", tmp_dir,
                                           "RUN #" + str(k + 1) + " \n")
                current_step = 0
                x_true = np.array([
                    self.start_state[m] for m in xrange(len(self.start_state))
                ])
                x_estimated = np.array([
                    self.start_state[m] for m in xrange(len(self.start_state))
                ])
                #x_estimated[0] += 0.2
                #x_estimated[0] = 0.4
                #x_predicted = self.start_state
                P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)]
                                for i in xrange(2 * self.robot_dof)])
                P_ext_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)]
                                    for i in xrange(2 * self.robot_dof)])
                deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                estimated_deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                total_reward = 0.0
                terminal = False
                last_x_true = None
                """
                Obtain a nominal path
                """
                print "plan"
                path_planner.set_start_and_goal(x_estimated, goal_states,
                                                self.goal_position,
                                                self.goal_radius)
                t0 = time.time()
                (xs, us, zs, control_durations, num_generated_paths, objective,
                 state_covariances, deviation_covariances,
                 estimated_deviation_covariances, mean_gen_times,
                 mean_eval_times, total_gen_times,
                 total_eval_times) = path_planner.plan_and_evaluate_paths(
                     1, 0, current_step, self.evaluation_horizon, P_t,
                     deviation_covariance, estimated_deviation_covariance, 0.0)
                print "objective " + str(objective)

                while True:
                    print "current step " + str(current_step)
                    '''if current_step == 7:
                        x_true = np.array([last_x_true[k] for k in xrange(len(last_x_true))])
                        for k in xrange(len(last_x_true) / 2, len(last_x_true)):
                            last_x_true[k] = 0.0'''
                    """
                    Predict system state at t+1 using nominal path
                    """
                    """ Get state matrices """
                    #As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(xs, us, control_durations)
                    As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(
                        [x_estimated], [us[0]], control_durations)
                    """ Predict using EKF """
                    (x_predicted_temp, P_predicted) = kalman.predict_state(
                        self.robot, x_estimated, us[0], control_durations[0],
                        self.simulation_step_size, As[0], Bs[0], Vs[0], Ms[0],
                        P_ext_t, self.dynamic_problem)
                    """ Make sure x_predicted fulfills the constraints """
                    if self.enforce_constraints:
                        x_predicted_temp = sim.check_constraints(
                            x_predicted_temp)
                    predicted_collided = True
                    if not sim.is_in_collision([], x_predicted_temp)[0]:
                        x_predicted = x_predicted_temp
                        predicted_collided = False
                    else:
                        print "X_PREDICTED COLLIDES!"
                        x_predicted = x_estimated
                        for l in xrange(
                                len(x_predicted) / 2, len(x_predicted)):
                            x_predicted[l] = 0

                    last_x_true = np.array(
                        [x_true[k] for k in xrange(len(x_true))])
                    """
                    Execute path for 1 time step
                    """
                    (x_true, x_tilde, x_tilde_linear, x_estimated_dash, z, P_t,
                     current_step, total_reward, terminal, estimated_s,
                     estimated_c, history_entries) = sim.simulate_n_steps(
                         xs,
                         us,
                         zs,
                         control_durations,
                         x_true,
                         x_estimated,
                         P_t,
                         total_reward,
                         current_step,
                         1,
                         0.0,
                         0.0,
                         max_num_steps=self.max_num_steps)

                    history_entries[-1].set_best_reward(objective)
                    """
                    Process history entries
                    """
                    try:
                        deviation_covariance = deviation_covariances[
                            len(history_entries) - 1]
                        estimated_deviation_covariance = estimated_deviation_covariances[
                            len(history_entries) - 1]
                    except:
                        print "what: len(deviation_covariances) " + str(
                            len(deviation_covariances))
                        print "len(history_entries) " + str(
                            len(history_entries))
                        print "len(xs) " + str(len(xs))

                    history_entries[0].set_replanning(True)
                    for l in xrange(len(history_entries)):
                        try:
                            history_entries[l].set_estimated_covariance(
                                state_covariances[l])
                        except:
                            print "l " + str(l)
                            print "len(state_covariances) " + str(
                                len(state_covariances))

                        if history_entries[l].collided:
                            num_collisions += 1
                        linearization_error += history_entries[
                            l].linearization_error
                    if (current_step == self.max_num_steps) or terminal:
                        history_entries[len(history_entries) -
                                        2].set_best_reward(objective)
                        history_entries[-1].set_best_reward(None)
                        for l in xrange(len(history_entries)):
                            history_entries[l].serialize(tmp_dir, "log.log")
                        final_states.append(history_entries[-1].x_true)
                        if terminal:
                            print "Terminal state reached"
                            successful_runs += 1
                        break
                    """
                    Plan new trajectories from predicted state
                    """
                    path_planner.set_start_and_goal(x_predicted, goal_states,
                                                    self.goal_position,
                                                    self.goal_radius)
                    t0 = time.time()
                    paths = path_planner.plan_paths(
                        self.num_paths,
                        0,
                        planning_timeout=self.timeout,
                        min_num_paths=1)
                    mean_planning_time += time.time() - t0
                    mean_number_planning_steps += 1.0
                    num_generated_paths_run += len(paths)
                    """
                    Filter update
                    """
                    (x_estimated_temp,
                     P_ext_t) = kalman.filter_update(x_predicted, P_predicted,
                                                     z, Hs[0], Ws[0], Ns[0])
                    """ Make sure x_estimated fulfills the constraints """
                    if self.enforce_constraints:
                        x_estimated_temp = sim.check_constraints(
                            x_estimated_temp)
                    in_collision, colliding_obstacle = sim.is_in_collision(
                        [], x_estimated_temp)
                    if in_collision:
                        history_entries[-1].set_estimate_collided(True)
                        for l in xrange(
                                len(x_estimated) / 2, len(x_estimated)):
                            x_estimated[l] = 0
                    elif history_entries[-1].collided and self.knows_collision:
                        for l in xrange(
                                len(x_estimated) / 2, len(x_estimated)):
                            x_estimated[l] = 0
                    else:
                        x_estimated = x_estimated_temp
                        history_entries[-1].set_estimate_collided(False)

                    history_entries[-1].serialize(tmp_dir, "log.log")
                    """
                    Adjust plan
                    """
                    t0 = time.time()
                    (xs_adj, us_adj, zs_adj, control_durations_adj,
                     adjusted) = plan_adjuster.adjust_plan(
                         self.robot, (xs, us, zs, control_durations),
                         x_estimated, P_t)
                    if not adjusted:
                        final_states.append(history_entries[-1].x_true)
                        print "current step " + str(current_step)
                        raise ValueError("Raised")
                        break
                    if self.show_viewer_simulation:
                        sim.update_viewer(
                            x_true,
                            x_estimated,
                            z,
                            control_duration=0.03,
                            colliding_obstacle=sim.colliding_obstacle)
                    """
                    Evaluate the adjusted plan and the planned paths
                    """
                    #if self.is_terminal(xs_adj[-1]) and not len(xs_adj)==1:
                    if not len(xs_adj) <= 1:
                        """
                        Only evaluate the adjusted path if it's > 1
                        """
                        paths.extend(
                            [[xs_adj, us_adj, zs_adj, control_durations_adj]])
                    if len(paths) == 0:
                        """
                        Running out of paths
                        """
                        print "Error: Couldn't generate an evaluate any paths"
                        final_states.append(history_entries[-1].x_true)
                        break

                    (path_index, xs, us, zs, control_durations, objective,
                     state_covariances, deviation_covariances,
                     estimated_deviation_covariances
                     ) = path_evaluator.evaluate_paths(paths, P_t,
                                                       current_step)
                    mean_planning_time += time.time() - t0
                    if path_index == None:
                        """
                        We couldn't evaluate any paths
                        """
                        final_states.append(history_entries[-1].x_true)
                        break

                    if self.show_viewer_simulation:
                        self.visualize_paths(self.robot, [xs])

                rewards_cov.append(total_reward)
                self.serializer.write_line(
                    "log.log", tmp_dir, "Reward: " + str(total_reward) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "\n")
                number_of_steps += current_step
            mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps
            mean_planning_time /= self.num_simulation_runs
            mean_num_generated_paths_step = num_generated_paths_run / mean_number_planning_steps
            """ Calculate the distance to goal area
            """
            ee_position_distances = []
            for state in final_states:
                joint_angles = v_double()
                joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
                ee_position = v_double()
                self.robot.getEndEffectorPosition(joint_angles, ee_position)
                ee_position = np.array(
                    [ee_position[y] for y in xrange(len(ee_position))])
                dist = np.linalg.norm(
                    np.array(self.goal_position - ee_position))
                if dist < self.goal_radius:
                    dist = 0.0
                ee_position_distances.append(dist)
            logging.info("HRF: Done. total_reward is " + str(total_reward))
            try:
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(
                    np.array(ee_position_distances))
            except:
                print ee_position_distances

            self.serializer.write_line("log.log", tmp_dir,
                                       "################################# \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "inc_covariance: " + str(self.inc_covariance) + "\n")
            if self.inc_covariance == "process":
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Process covariance: " + str(m_covs[j]) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir, "Observation covariance: " +
                    str(self.min_observation_covariance) + " \n")
            elif self.inc_covariance == "observation":
                self.serializer.write_line(
                    "log.log", tmp_dir, "Process covariance: " +
                    str(self.min_process_covariance) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Observation covariance: " + str(m_covs[j]) + " \n")
            mean_linearization_error = linearization_error / number_of_steps
            number_of_steps /= self.num_simulation_runs
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Mean number of steps: " + str(number_of_steps) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Mean number of generated paths per step: " +
                str(mean_num_generated_paths_step) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean num collisions per run: " +
                str(float(num_collisions) / float(self.num_simulation_runs)) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Average distance to goal area: " +
                str(mean_distance_to_goal) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Num successes: " + str(successful_runs) + " \n")
            print "succ " + str(
                (100.0 / self.num_simulation_runs) * successful_runs)
            self.serializer.write_line(
                "log.log", tmp_dir, "Percentage of successful runs: " + str(
                    (100.0 / self.num_simulation_runs) * successful_runs) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per run: " +
                str(mean_planning_time) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per planning step: " +
                str(mean_planning_time_per_step) + " \n")

            n, min_max, mean, var, skew, kurt = scipy.stats.describe(
                np.array(rewards_cov))
            print "mean_rewards " + str(mean)
            #plt.plot_histogram_from_data(rewards_cov)
            #sleep
            self.serializer.write_line("log.log", tmp_dir,
                                       "Mean rewards: " + str(mean) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Reward variance: " + str(var) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean linearisation error: " +
                str(mean_linearization_error) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Seed: " + str(self.seed) + " \n")
            cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_hrf_" + str(
                m_covs[j]) + ".log"
            os.system(cmd)
        cmd = "cp " + self.abs_path + "/config_hrf.yaml " + dir
        os.system(cmd)

        if not os.path.exists(dir + "/environment"):
            os.makedirs(dir + "/environment")

        cmd = "cp " + self.abs_path + "/" + str(
            self.environment_file) + " " + str(dir) + "/environment"
        os.system(cmd)

        if not os.path.exists(dir + "/model"):
            os.makedirs(dir + "/model")

        cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
        os.system(cmd)
        print "Done."
Exemple #2
0
    def __init__(self, plot):
        self.abs_path = os.path.dirname(os.path.abspath(__file__))
        """ Reading the config """
        warnings.filterwarnings("ignore")
        self.init_serializer()
        config = self.serializer.read_config("config_mpc.yaml",
                                             path=self.abs_path)
        self.set_params(config)
        if self.seed < 0:
            """
            Generate a random seed that will be stored
            """
            self.seed = np.random.randint(0, 42949672)
        np.random.seed(self.seed)

        logging_level = logging.WARN
        if config['verbose']:
            logging_level = logging.DEBUG
        logging.basicConfig(format='%(levelname)s: %(message)s',
                            level=logging_level)
        np.set_printoptions(precision=16)
        dir = self.abs_path + "/stats/mpc"
        tmp_dir = self.abs_path + "/tmp/mpc"
        self.utils = Utils()
        if not self.init_robot(self.robot_file):
            logging.error("MPC: Couldn't initialize robot")
            return
        if not self.setup_scene(self.environment_file, self.robot):
            return
        #self.run_viewer(self.robot_file, self.environment_file)
        self.clear_stats(dir)
        logging.info("Start up simulator")
        sim = Simulator()
        path_evaluator = PathEvaluator()
        path_planner = PathPlanningInterface()
        logging.info("MPC: Generating goal states...")
        goal_states = get_goal_states(
            "mpc", self.abs_path, self.serializer, self.obstacles, self.robot,
            self.max_velocity, self.delta_t, self.start_state,
            self.goal_position, self.goal_radius, self.planning_algortihm,
            self.path_timeout, self.num_generated_goal_states,
            self.continuous_collision, self.environment_file, self.num_cores)

        if len(goal_states) == 0:
            logging.error(
                "MPC: Couldn't generate any goal states. Problem seems to be infeasible"
            )
            return
        logging.info("MPC: Generated " + str(len(goal_states)) +
                     " goal states")
        sim.setup_reward_function(self.discount_factor, self.step_penalty,
                                  self.illegal_move_penalty, self.exit_reward)
        path_planner.setup(self.robot, self.obstacles, self.max_velocity,
                           self.delta_t, self.use_linear_path,
                           self.planning_algortihm, self.path_timeout,
                           self.continuous_collision, self.num_cores)
        if self.dynamic_problem:
            path_planner.setup_dynamic_problem(
                self.robot_file, self.environment_file,
                self.simulation_step_size, self.num_control_samples,
                self.min_control_duration, self.max_control_duration,
                self.add_intermediate_states, self.rrt_goal_bias,
                self.control_sampler)

        A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(
            self.delta_t, self.robot_dof)
        time_to_generate_paths = 0.0
        if check_positive_definite([C, D]):
            m_covs = None
            if self.inc_covariance == "process":
                m_covs = np.linspace(self.min_process_covariance,
                                     self.max_process_covariance,
                                     self.covariance_steps)
            elif self.inc_covariance == "observation":
                m_covs = np.linspace(self.min_observation_covariance,
                                     self.max_observation_covariance,
                                     self.covariance_steps)
        for j in xrange(len(m_covs)):
            M = None
            N = None
            if self.inc_covariance == "process":
                """ The process noise covariance matrix """
                M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
                #M = m_covs[j] * M_base
                """ The observation error covariance matrix """
                N = self.calc_covariance_value(self.robot,
                                               self.min_observation_covariance,
                                               N_base,
                                               covariance_type='observation')
            elif self.inc_covariance == "observation":
                M = self.calc_covariance_value(self.robot,
                                               self.min_process_covariance,
                                               M_base)
                N = self.calc_covariance_value(self.robot,
                                               m_covs[j],
                                               N_base,
                                               covariance_type='observation')
            P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)]
                            for l in xrange(2 * self.robot_dof)])

            path_planner.setup_path_evaluator(
                A, B, C, D, H, M, N, V, W, self.robot, self.sample_size,
                self.obstacles, self.goal_position, self.goal_radius,
                self.robot_file, self.environment_file)
            sim.setup_problem(A, B, C, D, H, V, W, M, N, self.robot,
                              self.enforce_control_constraints, self.obstacles,
                              self.goal_position, self.goal_radius,
                              self.max_velocity, self.show_viewer_simulation,
                              self.robot_file, self.environment_file)
            sim.set_stop_when_colliding(self.replan_when_colliding)
            if self.dynamic_problem:
                path_evaluator.setup_dynamic_problem()
                sim.setup_dynamic_problem(self.simulation_step_size)
            path_planner.setup_reward_function(self.step_penalty,
                                               self.exit_reward,
                                               self.illegal_move_penalty,
                                               self.discount_factor)
            mean_number_planning_steps = 0.0
            number_of_steps = 0.0
            mean_planning_time = 0.0
            num_generated_paths_run = 0.0
            successful_runs = 0
            num_collisions = 0.0
            linearization_error = 0.0
            final_states = []
            rewards_cov = []
            for k in xrange(self.num_simulation_runs):
                print "MPC: Run " + str(k + 1)
                self.serializer.write_line("log.log", tmp_dir,
                                           "RUN #" + str(k + 1) + " \n")
                current_step = 0
                x_true = self.start_state
                x_estimate = self.start_state
                x_tilde_linear = np.array(
                    [0.0 for i in xrange(2 * self.robot_dof)])
                P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)]
                                for i in xrange(2 * self.robot_dof)])
                deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                estimated_deviation_covariance = np.array(
                    [[0.0 for i in xrange(2 * self.robot_dof)]
                     for i in xrange(2 * self.robot_dof)])
                total_reward = 0.0
                terminal = False
                while current_step < self.max_num_steps and not terminal:
                    path_planner.set_start_and_goal(x_estimate, goal_states,
                                                    self.goal_position,
                                                    self.goal_radius)
                    t0 = time.time()
                    (xs, us, zs, control_durations, num_generated_paths,
                     best_val, state_covariances, deviation_covariances,
                     estimated_deviation_covariances, mean_gen_times,
                     mean_eval_times, total_gen_times,
                     total_eval_times) = path_planner.plan_and_evaluate_paths(
                         self.num_paths, 0, current_step,
                         self.evaluation_horizon, P_t, deviation_covariance,
                         estimated_deviation_covariance, self.timeout)
                    mean_planning_time += time.time() - t0
                    mean_number_planning_steps += 1.0
                    num_generated_paths_run += num_generated_paths
                    if len(xs) == 0:
                        logging.error(
                            "MPC: Couldn't find any paths from start state" +
                            str(x_estimate) + " to goal states")
                        final_states.append(x_true)
                        #total_reward = np.array([-self.illegal_move_penalty])[0]
                        current_step += 1
                        break
                    x_tilde = np.array(
                        [0.0 for i in xrange(2 * self.robot_dof)])
                    n_steps = self.num_execution_steps

                    if n_steps > len(xs) - 1:
                        n_steps = len(xs) - 1
                    if current_step + n_steps > self.max_num_steps:
                        n_steps = self.max_num_steps - current_step
                    (x_true, x_tilde, x_tilde_linear, x_estimate, P_t,
                     current_step, total_reward, terminal, estimated_s,
                     estimated_c, history_entries) = sim.simulate_n_steps(
                         xs,
                         us,
                         zs,
                         control_durations,
                         x_true,
                         x_tilde,
                         x_tilde_linear,
                         x_estimate,
                         P_t,
                         total_reward,
                         current_step,
                         n_steps,
                         0.0,
                         0.0,
                         max_num_steps=self.max_num_steps)
                    #print "len(hist) " + str(len(history_entries))
                    #print "len(deviation_covariances) " + str(len(deviation_covariances))
                    #print "len(estimated_deviation_covariances) " + str(len(estimated_deviation_covariances))
                    try:
                        deviation_covariance = deviation_covariances[
                            len(history_entries) - 1]
                        estimated_deviation_covariance = estimated_deviation_covariances[
                            len(history_entries) - 1]
                    except:
                        print "what: len(deviation_covariances) " + str(
                            len(deviation_covariances))
                        print "len(history_entries) " + str(
                            len(history_entries))
                        print "len(xs) " + str(len(xs))

                    if (current_step == self.max_num_steps) or terminal:
                        final_states.append(history_entries[-1].x_true)
                        if terminal:
                            successful_runs += 1

                    history_entries[0].set_replanning(True)
                    for l in xrange(len(history_entries)):
                        try:
                            history_entries[l].set_estimated_covariance(
                                state_covariances[l])
                        except:
                            print "l " + str(l)
                            print "len(state_covariances) " + str(
                                len(state_covariances))

                        history_entries[l].serialize(tmp_dir, "log.log")
                        if history_entries[l].collided:
                            num_collisions += 1
                        linearization_error += history_entries[
                            l].linearization_error
                    #logging.warn("MPC: Execution finished. True state is " + str(x_true))
                    #logging.warn("MPC: Estimated state is " + str(x_estimate))
                    logging.warn("MPC: Executed " + str(current_step) +
                                 " steps")
                    logging.warn("MPC: terminal " + str(terminal))
                    if terminal:
                        logging.info("MPC: Final state: " + str(x_true))
                rewards_cov.append(total_reward)
                self.serializer.write_line(
                    "log.log", tmp_dir, "Reward: " + str(total_reward) + " \n")
                self.serializer.write_line("log.log", tmp_dir, "\n")
                number_of_steps += current_step
            mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps
            mean_planning_time /= self.num_simulation_runs
            """ Calculate the distance to goal area
            """
            ee_position_distances = []
            for state in final_states:
                joint_angles = v_double()
                joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
                ee_position = v_double()
                self.robot.getEndEffectorPosition(joint_angles, ee_position)
                ee_position = np.array(
                    [ee_position[y] for y in xrange(len(ee_position))])
                dist = np.linalg.norm(
                    np.array(self.goal_position - ee_position))
                if dist < self.goal_radius:
                    dist = 0.0
                ee_position_distances.append(dist)
            logging.info("MPC: Done. total_reward is " + str(total_reward))
            try:
                n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(
                    np.array(ee_position_distances))
            except:
                print ee_position_distances

            self.serializer.write_line("log.log", tmp_dir,
                                       "################################# \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "inc_covariance: " + str(self.inc_covariance) + "\n")
            if self.inc_covariance == "process":
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Process covariance: " + str(m_covs[j]) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir, "Observation covariance: " +
                    str(self.min_observation_covariance) + " \n")
            elif self.inc_covariance == "observation":
                self.serializer.write_line(
                    "log.log", tmp_dir, "Process covariance: " +
                    str(self.min_process_covariance) + " \n")
                self.serializer.write_line(
                    "log.log", tmp_dir,
                    "Observation covariance: " + str(m_covs[j]) + " \n")
            mean_linearization_error = linearization_error / number_of_steps
            number_of_steps /= self.num_simulation_runs
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Mean number of steps: " + str(number_of_steps) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean num collisions per run: " +
                str(float(num_collisions) / float(self.num_simulation_runs)) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Average distance to goal area: " +
                str(mean_distance_to_goal) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Num successes: " + str(successful_runs) + " \n")
            print "succ " + str(
                (100.0 / self.num_simulation_runs) * successful_runs)
            self.serializer.write_line(
                "log.log", tmp_dir, "Percentage of successful runs: " + str(
                    (100.0 / self.num_simulation_runs) * successful_runs) +
                " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per run: " +
                str(mean_planning_time) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean planning time per planning step: " +
                str(mean_planning_time_per_step) + " \n")

            n, min_max, mean, var, skew, kurt = scipy.stats.describe(
                np.array(rewards_cov))
            print "mean_rewards " + str(mean)
            #plt.plot_histogram_from_data(rewards_cov)
            #sleep
            self.serializer.write_line("log.log", tmp_dir,
                                       "Mean rewards: " + str(mean) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Reward variance: " + str(var) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir, "Mean linearisation error: " +
                str(mean_linearization_error) + " \n")
            self.serializer.write_line(
                "log.log", tmp_dir,
                "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
            self.serializer.write_line("log.log", tmp_dir,
                                       "Seed: " + str(self.seed) + " \n")
            cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_mpc_" + str(
                m_covs[j]) + ".log"
            os.system(cmd)

        cmd = "cp " + self.abs_path + "/config_mpc.yaml " + dir
        os.system(cmd)

        if not os.path.exists(dir + "/environment"):
            os.makedirs(dir + "/environment")

        cmd = "cp " + self.abs_path + "/" + str(
            self.environment_file) + " " + str(dir) + "/environment"
        os.system(cmd)

        if not os.path.exists(dir + "/model"):
            os.makedirs(dir + "/model")

        cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
        os.system(cmd)
        print "Done."
Exemple #3
0
 def __init__(self, plot):
     self.abs_path = os.path.dirname(os.path.abspath(__file__))
     """ Reading the config """
     warnings.filterwarnings("ignore")
     self.init_serializer()
     config = self.serializer.read_config("config_mpc.yaml", path=self.abs_path)
     self.set_params(config)
     if self.seed < 0:
         """
         Generate a random seed that will be stored
         """
         self.seed = np.random.randint(0, 4294967295)
     np.random.seed(self.seed)
     
     logging_level = logging.WARN
     if config['verbose']:
         logging_level = logging.DEBUG
     logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level)        
     np.set_printoptions(precision=16)
     dir = self.abs_path + "/stats/mpc"
     tmp_dir = self.abs_path + "/tmp/mpc"       
     self.utils = Utils()
     if not self.init_robot(self.robot_file):
         logging.error("MPC: Couldn't initialize robot")
         return               
     if not self.setup_scene(self.environment_file, self.robot):
         return   
     #self.run_viewer(self.robot_file, self.environment_file)     
     self.clear_stats(dir)
     logging.info("Start up simulator")
     sim = Simulator()        
     path_evaluator = PathEvaluator()
     path_planner = PathPlanningInterface()
     logging.info("MPC: Generating goal states...")        
     goal_states = get_goal_states("mpc",
                                   self.abs_path,
                                   self.serializer, 
                                   self.obstacles,                                                                           
                                   self.robot,                                    
                                   self.max_velocity,
                                   self.delta_t,
                                   self.start_state,
                                   self.goal_position,
                                   self.goal_radius,
                                   self.planning_algortihm,
                                   self.path_timeout,
                                   self.num_generated_goal_states,
                                   self.continuous_collision,
                                   self.environment_file,
                                   self.num_cores)
     
     if len(goal_states) == 0:
         logging.error("MPC: Couldn't generate any goal states. Problem seems to be infeasible")
         return
     logging.info("MPC: Generated " + str(len(goal_states)) + " goal states")
     sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward)
     path_planner.setup(self.robot,                         
                        self.obstacles,  
                        self.max_velocity, 
                        self.delta_t, 
                        self.use_linear_path,
                        self.planning_algortihm,
                        self.path_timeout,
                        self.continuous_collision,
                        self.num_cores)
     if self.dynamic_problem:
         path_planner.setup_dynamic_problem(self.robot_file,
                                            self.environment_file,
                                            self.simulation_step_size,
                                            self.num_control_samples,
                                            self.min_control_duration,
                                            self.max_control_duration,
                                            self.add_intermediate_states,
                                            self.rrt_goal_bias,
                                            self.control_sampler)
          
     A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof)
     time_to_generate_paths = 0.0
     if check_positive_definite([C, D]):
         m_covs = None
         if self.inc_covariance == "process":
             m_covs = np.linspace(self.min_process_covariance, 
                                  self.max_process_covariance, 
                                  self.covariance_steps)                 
         elif self.inc_covariance == "observation":          
             m_covs = np.linspace(self.min_observation_covariance, 
                                  self.max_observation_covariance,
                                  self.covariance_steps)
     for j in xrange(len(m_covs)):
         M = None
         N = None
         if self.inc_covariance == "process":
             """ The process noise covariance matrix """
             M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
             #M = m_covs[j] * M_base
                 
             """ The observation error covariance matrix """
             N = self.calc_covariance_value(self.robot, 
                                            self.min_observation_covariance, 
                                            N_base, 
                                            covariance_type='observation')                    
         elif self.inc_covariance == "observation":
             M = self.calc_covariance_value(self.robot, 
                                            self.min_process_covariance,
                                            M_base)
             N = self.calc_covariance_value(self.robot, 
                                            m_covs[j],
                                            N_base, 
                                            covariance_type='observation')                    
         P_t = np.array([[0.0 for k in xrange(2 * self.robot_dof)] for l in xrange(2 * self.robot_dof)])
         
         path_planner.setup_path_evaluator(A, B, C, D, H, M, N, V, W,                                     
                                           self.robot, 
                                           self.sample_size, 
                                           self.obstacles,
                                           self.goal_position,
                                           self.goal_radius,                                              
                                           self.robot_file,
                                           self.environment_file)
         sim.setup_problem(A, B, C, D, H, V, W, M, N,
                           self.robot, 
                           self.enforce_control_constraints,
                           self.obstacles, 
                           self.goal_position, 
                           self.goal_radius,
                           self.max_velocity,                                  
                           self.show_viewer_simulation,
                           self.robot_file,
                           self.environment_file)
         sim.set_stop_when_colliding(self.replan_when_colliding)
         if self.dynamic_problem:
             path_evaluator.setup_dynamic_problem()
             sim.setup_dynamic_problem(self.simulation_step_size)
         path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor)
         mean_number_planning_steps = 0.0
         number_of_steps = 0.0
         mean_planning_time = 0.0
         num_generated_paths_run = 0.0
         successful_runs = 0
         num_collisions = 0.0
         final_states= []
         rewards_cov = []
         for k in xrange(self.num_simulation_runs):
             print "MPC: Run " + str(k + 1)                                
             self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n")
             current_step = 0
             x_true = self.start_state
             x_estimate = self.start_state
             x_tilde_linear = np.array([0.0 for i in xrange(2 * self.robot_dof)])
             P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) 
             deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])
             estimated_deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])              
             total_reward = 0.0
             terminal = False
             while current_step < self.max_num_steps and not terminal:
                 path_planner.set_start_and_goal(x_estimate, goal_states, self.goal_position, self.goal_radius)
                 t0 = time.time()
                 (xs, 
                  us, 
                  zs,
                  control_durations, 
                  num_generated_paths, 
                  best_val,
                  state_covariances,
                  deviation_covariances,
                  estimated_deviation_covariances, 
                  mean_gen_times, 
                  mean_eval_times,
                  total_gen_times,
                  total_eval_times) = path_planner.plan_and_evaluate_paths(self.num_paths, 
                                                                           0, 
                                                                           current_step, 
                                                                           self.evaluation_horizon, 
                                                                           P_t,
                                                                           deviation_covariance,
                                                                           estimated_deviation_covariance, 
                                                                           self.timeout)
                 mean_planning_time += time.time() - t0
                 mean_number_planning_steps += 1.0  
                 num_generated_paths_run += num_generated_paths
                 if len(xs) == 0:
                     logging.error("MPC: Couldn't find any paths from start state" + 
                                   str(x_estimate) + 
                                   " to goal states") 
                                   
                     total_reward = np.array([-self.illegal_move_penalty])[0]
                     current_step += 1                                             
                     break
                 x_tilde = np.array([0.0 for i in xrange(2 * self.robot_dof)])
                 n_steps = self.num_execution_steps
                 
                 if n_steps > len(xs) - 1:
                    n_steps = len(xs) - 1
                 if current_step + n_steps > self.max_num_steps:
                     n_steps = self.max_num_steps - current_step
                 (x_true, 
                  x_tilde,
                  x_tilde_linear, 
                  x_estimate, 
                  P_t, 
                  current_step, 
                  total_reward,
                  terminal,
                  estimated_s,
                  estimated_c,
                  history_entries) = sim.simulate_n_steps(xs, us, zs,
                                                          control_durations,
                                                          x_true,                                                              
                                                          x_tilde,
                                                          x_tilde_linear,
                                                          x_estimate,
                                                          P_t,
                                                          total_reward,                                                                 
                                                          current_step,
                                                          n_steps,
                                                          0.0,
                                                          0.0,
                                                          max_num_steps=self.max_num_steps)
                 #print "len(hist) " + str(len(history_entries))
                 #print "len(deviation_covariances) " + str(len(deviation_covariances))
                 #print "len(estimated_deviation_covariances) " + str(len(estimated_deviation_covariances))
                 deviation_covariance = deviation_covariances[len(history_entries) - 1]
                 estimated_deviation_covariance = estimated_deviation_covariances[len(history_entries) - 1]
                 
                 if (current_step == self.max_num_steps) or terminal:
                     final_states.append(history_entries[-1].x_true)
                     print "len " + str(len(history_entries))
                     print "t " + str(history_entries[-1].t)
                     if terminal:
                         successful_runs += 1
                     
                 history_entries[0].set_replanning(True)                        
                 for l in xrange(len(history_entries)):
                     history_entries[l].set_estimated_covariance(state_covariances[l])                        
                     history_entries[l].serialize(tmp_dir, "log.log")
                     if history_entries[l].collided:                            
                         num_collisions += 1
                         collided = True
                 #logging.warn("MPC: Execution finished. True state is " + str(x_true))
                 #logging.warn("MPC: Estimated state is " + str(x_estimate))
                 logging.warn("MPC: Executed " + str(current_step) + " steps") 
                 logging.warn("MPC: terminal " + str(terminal))
                 if terminal:
                     print "MPC: Final state: " + str(x_true)
             rewards_cov.append(total_reward)
             self.serializer.write_line("log.log", 
                                        tmp_dir,
                                        "Reward: " + str(total_reward) + " \n") 
             self.serializer.write_line("log.log", 
                                        tmp_dir,
                                        "\n")
             number_of_steps += current_step
         mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps 
         mean_planning_time /= self.num_simulation_runs
                             
         """ Calculate the distance to goal area
         """  
         ee_position_distances = [] 
         for state in final_states:
             joint_angles = v_double()
             joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
             ee_position = v_double()
             self.robot.getEndEffectorPosition(joint_angles, ee_position)
             ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))])
             dist = np.linalg.norm(np.array(self.goal_position - ee_position))
             if dist < self.goal_radius:
                 dist = 0.0
             ee_position_distances.append(dist)
         logging.info("MPC: Done. total_reward is " + str(total_reward))
         try:
             n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances))
         except:
             print ee_position_distances
             sleep                         
             
         self.serializer.write_line("log.log", tmp_dir, "################################# \n")
         self.serializer.write_line("log.log",
                                    tmp_dir,
                                    "inc_covariance: " + str(self.inc_covariance) + "\n")
         if self.inc_covariance == "process":                  
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Process covariance: " + str(m_covs[j]) + " \n")
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Observation covariance: " + str(self.min_observation_covariance) + " \n")
         elif self.inc_covariance == "observation":                    
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Process covariance: " + str(self.min_process_covariance) + " \n")
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Observation covariance: " + str(m_covs[j]) + " \n")
         number_of_steps /= self.num_simulation_runs
         self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n")                            
         self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n")
         self.serializer.write_line("log.log", 
                                    tmp_dir, 
                                    "Average distance to goal area: " + str(mean_distance_to_goal) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n")
         print "succ " + str((100.0 / self.num_simulation_runs) * successful_runs)
         self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successful_runs) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n")
         
         n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov))
         print "mean_rewards " + str(mean)
         #plt.plot_histogram_from_data(rewards_cov)
         #sleep
         self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n")
         self.serializer.write_line("log.log", 
                                    tmp_dir, 
                                    "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n")
         cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_mpc_" + str(m_covs[j]) + ".log"
         os.system(cmd)
     
     cmd = "cp " + self.abs_path + "/config_mpc.yaml " + dir           
     os.system(cmd)
     
     if not os.path.exists(dir + "/environment"):
         os.makedirs(dir + "/environment") 
         
     cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment"
     os.system(cmd) 
         
     if not os.path.exists(dir + "/model"):
         os.makedirs(dir + "/model")
             
     cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
     os.system(cmd)
     print "Done."
Exemple #4
0
 def __init__(self):
     self.abs_path = os.path.dirname(os.path.abspath(__file__))
     cmd = "rm -rf " + self.abs_path + "/tmp"
     popen = subprocess.Popen(cmd, cwd=self.abs_path, shell=True)
     popen.wait()
     """ Reading the config """
     warnings.filterwarnings("ignore")
     self.init_serializer()
     config = self.serializer.read_config("config_hrf.yaml", path=self.abs_path)
     self.set_params(config)
     if self.seed < 0:
         """
         Generate a random seed that will be stored
         """
         self.seed = np.random.randint(0, 42949672)
     np.random.seed(self.seed)
     
     logging_level = logging.WARN
     if config['verbose']:
         logging_level = logging.DEBUG
     logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level)        
     np.set_printoptions(precision=16)
     dir = self.abs_path + "/stats/hrf"
     tmp_dir = self.abs_path + "/tmp/hrf"
     self.utils = Utils()
     if not self.init_robot(self.robot_file):
         logging.error("HRF: Couldn't initialize robot")
         return               
     if not self.setup_scene(self.environment_file, self.robot):
         return        
         
     self.clear_stats(dir)
     logging.info("Start up simulator")
     sim = Simulator() 
     plan_adjuster = PlanAdjuster()       
     path_evaluator = PathEvaluator()
     path_planner = PathPlanningInterface()
     if self.show_viewer_simulation:
         self.robot.setupViewer(self.robot_file, self.environment_file)            
     
     logging.info("HRF: Generating goal states...")
     problem_feasible = False
     while not problem_feasible:
         print "Creating random scene..." 
         #self.create_random_obstacles(20)
         goal_states = get_goal_states("hrf",
                                       self.abs_path,
                                       self.serializer, 
                                       self.obstacles,                                                                           
                                       self.robot,                                    
                                       self.max_velocity,
                                       self.delta_t,
                                       self.start_state,
                                       self.goal_position,
                                       self.goal_radius,
                                       self.planning_algortihm,
                                       self.path_timeout,
                                       self.num_generated_goal_states,
                                       self.continuous_collision,
                                       self.environment_file,
                                       self.num_cores)
         
         if len(goal_states) == 0:
             logging.error("HRF: Couldn't generate any goal states. Problem seems to be infeasible")
         else:
             problem_feasible = True
     '''obst = v_obstacle()
     obst[:] = self.obstacles
     self.robot.addObstacles(obst)'''
     logging.info("HRF: Generated " + str(len(goal_states)) + " goal states")
     sim.setup_reward_function(self.discount_factor, self.step_penalty, self.illegal_move_penalty, self.exit_reward)
     path_planner.setup(self.robot,                         
                        self.obstacles,  
                        self.max_velocity, 
                        self.delta_t, 
                        self.use_linear_path,
                        self.planning_algortihm,
                        self.path_timeout,
                        self.continuous_collision,
                        self.num_cores)
     if self.dynamic_problem:
         path_planner.setup_dynamic_problem(self.robot_file,
                                            self.environment_file,
                                            self.simulation_step_size,
                                            self.num_control_samples,
                                            self.min_control_duration,
                                            self.max_control_duration,
                                            self.add_intermediate_states,
                                            self.rrt_goal_bias,
                                            self.control_sampler)
     
     A, H, B, V, W, C, D, M_base, N_base = self.problem_setup(self.delta_t, self.robot_dof)
     time_to_generate_paths = 0.0
     if check_positive_definite([C, D]):
         m_covs = None
         if self.inc_covariance == "process":
             m_covs = np.linspace(self.min_process_covariance, 
                                  self.max_process_covariance, 
                                  self.covariance_steps)                 
         elif self.inc_covariance == "observation":          
             m_covs = np.linspace(self.min_observation_covariance, 
                                  self.max_observation_covariance,
                                  self.covariance_steps)
     for j in xrange(len(m_covs)):
         M = None
         N = None
         if self.inc_covariance == "process":
             """ The process noise covariance matrix """
             M = self.calc_covariance_value(self.robot, m_covs[j], M_base)
             #M = m_covs[j] * M_base
                 
             """ The observation error covariance matrix """
             N = self.calc_covariance_value(self.robot, 
                                            self.min_observation_covariance, 
                                            N_base, 
                                            covariance_type='observation')                    
         elif self.inc_covariance == "observation":
             M = self.calc_covariance_value(self.robot, 
                                            self.min_process_covariance,
                                            M_base)
             N = self.calc_covariance_value(self.robot, 
                                            m_covs[j],
                                            N_base, 
                                            covariance_type='observation')
         
         path_planner.setup_path_evaluator(A, B, C, D, H, M, N, V, W,                                     
                                           self.robot, 
                                           self.sample_size, 
                                           self.obstacles,
                                           self.goal_position,
                                           self.goal_radius,                                              
                                           self.robot_file,
                                           self.environment_file)
         sim.setup_problem(A, B, C, D, H, V, W, M, N,
                           self.robot, 
                           self.enforce_control_constraints,
                           self.obstacles, 
                           self.goal_position, 
                           self.goal_radius,
                           self.max_velocity,                                  
                           self.show_viewer_simulation,
                           self.robot_file,
                           self.environment_file,
                           self.knows_collision)
         path_evaluator.setup(A, B, C, D, H, M, N, V, W,                                     
                              self.robot, 
                              self.sample_size, 
                              self.obstacles,
                              self.goal_position,
                              self.goal_radius,
                              self.show_viewer_evaluation,
                              self.robot_file,
                              self.environment_file,
                              self.num_cores)
         path_evaluator.setup_reward_function(self.step_penalty, self.illegal_move_penalty, self.exit_reward, self.discount_factor)
         plan_adjuster.setup(self.robot,
                             M, 
                             H, 
                             W, 
                             N, 
                             C, 
                             D,
                             self.dynamic_problem, 
                             self.enforce_control_constraints)
         plan_adjuster.set_simulation_step_size(self.simulation_step_size)
         plan_adjuster.set_model_matrices(A, B, V)
         if not self.dynamic_problem:
             plan_adjuster.set_max_joint_velocities_linear_problem(np.array([self.max_velocity for i in xrange(self.robot_dof)]))
         sim.set_stop_when_colliding(self.replan_when_colliding)
         if self.dynamic_problem:
             path_evaluator.setup_dynamic_problem()
             sim.setup_dynamic_problem(self.simulation_step_size)
         path_planner.setup_reward_function(self.step_penalty, self.exit_reward, self.illegal_move_penalty, self.discount_factor)
         mean_number_planning_steps = 0.0
         number_of_steps = 0.0
         mean_planning_time = 0.0
         num_generated_paths_run = 0.0
         successful_runs = 0
         num_collisions = 0.0
         linearization_error = 0.0
         final_states= []
         rewards_cov = []
         for k in xrange(self.num_simulation_runs):
             print "HRF: Run " + str(k + 1)                                
             self.serializer.write_line("log.log", tmp_dir, "RUN #" + str(k + 1) + " \n")
             current_step = 0
             x_true = np.array([self.start_state[m] for m in xrange(len(self.start_state))])
             x_estimated = np.array([self.start_state[m] for m in xrange(len(self.start_state))])
             #x_estimated[0] += 0.2                
             #x_estimated[0] = 0.4             
             #x_predicted = self.start_state                               
             P_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) 
             P_ext_t = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)]) 
             deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])
             estimated_deviation_covariance = np.array([[0.0 for i in xrange(2 * self.robot_dof)] for i in xrange(2 * self.robot_dof)])              
             total_reward = 0.0
             terminal = False 
             last_x_true = None               
             
             """
             Obtain a nominal path
             """
             print "plan"
             path_planner.set_start_and_goal(x_estimated, goal_states, self.goal_position, self.goal_radius)
             t0 = time.time()                
             (xs, 
              us, 
              zs,
              control_durations, 
              num_generated_paths, 
              objective,
              state_covariances,
              deviation_covariances,
              estimated_deviation_covariances, 
              mean_gen_times, 
              mean_eval_times,
              total_gen_times,
              total_eval_times) = path_planner.plan_and_evaluate_paths(1, 
                                                                       0, 
                                                                       current_step, 
                                                                       self.evaluation_horizon, 
                                                                       P_t,
                                                                       deviation_covariance,
                                                                       estimated_deviation_covariance, 
                                                                       0.0)
             print "objective " + str(objective)
             
             while True: 
                 print "current step " + str(current_step) 
                 '''if current_step == 7:
                     x_true = np.array([last_x_true[k] for k in xrange(len(last_x_true))])
                     for k in xrange(len(last_x_true) / 2, len(last_x_true)):
                         last_x_true[k] = 0.0'''   
                 """
                 Predict system state at t+1 using nominal path
                 """
                 
                 """ Get state matrices """
                 #As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices(xs, us, control_durations)
                 As, Bs, Vs, Ms, Hs, Ws, Ns = sim.get_linear_model_matrices([x_estimated], [us[0]], control_durations)
                 
                 """ Predict using EKF """
                 (x_predicted_temp, P_predicted) = kalman.predict_state(self.robot,
                                                                        x_estimated,                                                                     
                                                                        us[0], 
                                                                        control_durations[0],
                                                                        self.simulation_step_size, 
                                                                        As[0],
                                                                        Bs[0],
                                                                        Vs[0],
                                                                        Ms[0],
                                                                        P_ext_t,
                                                                        self.dynamic_problem)                    
                 
                 """ Make sure x_predicted fulfills the constraints """                 
                 if self.enforce_constraints:     
                     x_predicted_temp = sim.check_constraints(x_predicted_temp)
                 predicted_collided = True              
                 if not sim.is_in_collision([], x_predicted_temp)[0]:                                                                                                    
                     x_predicted = x_predicted_temp
                     predicted_collided = False
                 else: 
                     print "X_PREDICTED COLLIDES!"
                     x_predicted = x_estimated         
                     for l in xrange(len(x_predicted) / 2, len(x_predicted)):                            
                         x_predicted[l] = 0 
                         
                 last_x_true = np.array([x_true[k] for k in xrange(len(x_true))])  
                 
                 """
                 Execute path for 1 time step
                 """                    
                 (x_true,                     
                  x_tilde,
                  x_tilde_linear, 
                  x_estimated_dash,
                  z, 
                  P_t, 
                  current_step, 
                  total_reward,
                  terminal,
                  estimated_s,
                  estimated_c,
                  history_entries) = sim.simulate_n_steps(xs, us, zs,
                                                          control_durations,
                                                          x_true,
                                                          x_estimated,
                                                          P_t,
                                                          total_reward,                                                                 
                                                          current_step,
                                                          1,
                                                          0.0,
                                                          0.0,
                                                          max_num_steps=self.max_num_steps)
                 
                 history_entries[-1].set_best_reward(objective)                                        
                  
                 """
                 Process history entries
                 """                    
                 try:
                     deviation_covariance = deviation_covariances[len(history_entries) - 1]
                     estimated_deviation_covariance = estimated_deviation_covariances[len(history_entries) - 1]
                 except:
                     print "what: len(deviation_covariances) " + str(len(deviation_covariances))
                     print "len(history_entries) " + str(len(history_entries))
                     print "len(xs) " + str(len(xs))
                 
                 history_entries[0].set_replanning(True)                                           
                 for l in xrange(len(history_entries)):
                     try:
                         history_entries[l].set_estimated_covariance(state_covariances[l])
                     except:
                         print "l " + str(l)
                         print "len(state_covariances) " + str(len(state_covariances))                                                   
                     
                     if history_entries[l].collided:                            
                         num_collisions += 1                            
                     linearization_error += history_entries[l].linearization_error
                 if (current_step == self.max_num_steps) or terminal:
                     history_entries[len(history_entries) - 2].set_best_reward(objective)
                     history_entries[-1].set_best_reward(None)
                     for l in xrange(len(history_entries)):                            
                         history_entries[l].serialize(tmp_dir, "log.log")
                     final_states.append(history_entries[-1].x_true)                        
                     if terminal:
                         print "Terminal state reached"
                         successful_runs += 1
                     break                    
                     
                 
                 
                 """
                 Plan new trajectories from predicted state
                 """
                 path_planner.set_start_and_goal(x_predicted, goal_states, self.goal_position, self.goal_radius) 
                 t0 = time.time()                   
                 paths = path_planner.plan_paths(self.num_paths, 0, planning_timeout=self.timeout, min_num_paths=1)
                 mean_planning_time += time.time() - t0
                 mean_number_planning_steps += 1.0
                 num_generated_paths_run += len(paths)                    
                 
                 """
                 Filter update
                 """
                 (x_estimated_temp, P_ext_t) = kalman.filter_update(x_predicted, P_predicted, z, Hs[0], Ws[0], Ns[0])
                 
                 
                 """ Make sure x_estimated fulfills the constraints """                 
                 if self.enforce_constraints:     
                     x_estimated_temp = sim.check_constraints(x_estimated_temp) 
                 in_collision, colliding_obstacle = sim.is_in_collision([], x_estimated_temp)
                 if in_collision:
                     history_entries[-1].set_estimate_collided(True)                        
                     for l in xrange(len(x_estimated) / 2, len(x_estimated)):                            
                         x_estimated[l] = 0
                 elif history_entries[-1].collided and self.knows_collision:
                     for l in xrange(len(x_estimated) / 2, len(x_estimated)):                            
                         x_estimated[l] = 0
                 else:
                     x_estimated = x_estimated_temp                         
                     history_entries[-1].set_estimate_collided(False)
                 
                 history_entries[-1].serialize(tmp_dir, "log.log")
                 """
                 Adjust plan
                 """ 
                 t0 = time.time()                   
                 (xs_adj, us_adj, zs_adj, control_durations_adj, adjusted) = plan_adjuster.adjust_plan(self.robot,
                                                                                                      (xs, us, zs, control_durations),                                                                                                
                                                                                                      x_estimated,
                                                                                                      P_t)
                 if not adjusted:
                     final_states.append(history_entries[-1].x_true)
                     print "current step " + str(current_step)
                     raise ValueError("Raised")
                     break
                 if self.show_viewer_simulation:
                     sim.update_viewer(x_true, x_estimated, z, control_duration=0.03, colliding_obstacle=sim.colliding_obstacle)
                 
                 """
                 Evaluate the adjusted plan and the planned paths
                 """
                 #if self.is_terminal(xs_adj[-1]) and not len(xs_adj)==1:
                 if not len(xs_adj)<=1:
                     """
                     Only evaluate the adjusted path if it's > 1
                     """
                     paths.extend([[xs_adj, us_adj, zs_adj, control_durations_adj]])
                 if len(paths) == 0:
                     """
                     Running out of paths
                     """
                     print "Error: Couldn't generate an evaluate any paths"
                     final_states.append(history_entries[-1].x_true)
                     break
                 
                 (path_index,
                  xs, 
                  us, 
                  zs, 
                  control_durations, 
                  objective, 
                  state_covariances,
                  deviation_covariances,
                  estimated_deviation_covariances) = path_evaluator.evaluate_paths(paths, 
                                                                                   P_t, 
                                                                                   current_step)
                 mean_planning_time += time.time() - t0
                 if path_index == None:
                     """
                     We couldn't evaluate any paths
                     """
                     final_states.append(history_entries[-1].x_true)
                     break
                 
                 if self.show_viewer_simulation:
                     self.visualize_paths(self.robot, [xs])    
                                              
             rewards_cov.append(total_reward)                
             self.serializer.write_line("log.log", 
                                        tmp_dir,
                                        "Reward: " + str(total_reward) + " \n") 
             self.serializer.write_line("log.log", 
                                        tmp_dir,
                                        "\n")
             number_of_steps += current_step
         mean_planning_time_per_step = mean_planning_time / mean_number_planning_steps 
         mean_planning_time /= self.num_simulation_runs
         mean_num_generated_paths_step = num_generated_paths_run / mean_number_planning_steps
                             
         """ Calculate the distance to goal area
         """  
         ee_position_distances = [] 
         for state in final_states:
             joint_angles = v_double()
             joint_angles[:] = [state[y] for y in xrange(len(state) / 2)]
             ee_position = v_double()
             self.robot.getEndEffectorPosition(joint_angles, ee_position)
             ee_position = np.array([ee_position[y] for y in xrange(len(ee_position))])
             dist = np.linalg.norm(np.array(self.goal_position - ee_position))
             if dist < self.goal_radius:
                 dist = 0.0
             ee_position_distances.append(dist)
         logging.info("HRF: Done. total_reward is " + str(total_reward))
         try:
             n, min_max, mean_distance_to_goal, var, skew, kurt = scipy.stats.describe(np.array(ee_position_distances))
         except:
             print ee_position_distances                                         
             
         self.serializer.write_line("log.log", tmp_dir, "################################# \n")
         self.serializer.write_line("log.log",
                                    tmp_dir,
                                    "inc_covariance: " + str(self.inc_covariance) + "\n")
         if self.inc_covariance == "process":                  
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Process covariance: " + str(m_covs[j]) + " \n")
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Observation covariance: " + str(self.min_observation_covariance) + " \n")
         elif self.inc_covariance == "observation":                    
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Process covariance: " + str(self.min_process_covariance) + " \n")
             self.serializer.write_line("log.log",
                                        tmp_dir,
                                        "Observation covariance: " + str(m_covs[j]) + " \n")
         mean_linearization_error = linearization_error / number_of_steps
         number_of_steps /= self.num_simulation_runs
         self.serializer.write_line("log.log", tmp_dir, "Mean number of steps: " + str(number_of_steps) + " \n") 
         self.serializer.write_line("log.log", tmp_dir, "Mean number of generated paths per step: " + str(mean_num_generated_paths_step) + " \n")                            
         self.serializer.write_line("log.log", tmp_dir, "Mean num collisions per run: " + str(float(num_collisions) / float(self.num_simulation_runs)) + " \n")
         self.serializer.write_line("log.log", 
                                    tmp_dir, 
                                    "Average distance to goal area: " + str(mean_distance_to_goal) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Num successes: " + str(successful_runs) + " \n")
         print "succ " + str((100.0 / self.num_simulation_runs) * successful_runs)
         self.serializer.write_line("log.log", tmp_dir, "Percentage of successful runs: " + str((100.0 / self.num_simulation_runs) * successful_runs) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Mean planning time per run: " + str(mean_planning_time) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Mean planning time per planning step: " + str(mean_planning_time_per_step) + " \n")
         
         n, min_max, mean, var, skew, kurt = scipy.stats.describe(np.array(rewards_cov))
         print "mean_rewards " + str(mean)
         #plt.plot_histogram_from_data(rewards_cov)
         #sleep
         self.serializer.write_line("log.log", tmp_dir, "Mean rewards: " + str(mean) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Reward variance: " + str(var) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Mean linearisation error: " + str(mean_linearization_error) + " \n")
         self.serializer.write_line("log.log", 
                                    tmp_dir, 
                                    "Reward standard deviation: " + str(np.sqrt(var)) + " \n")
         self.serializer.write_line("log.log", tmp_dir, "Seed: " + str(self.seed) + " \n")
         cmd = "mv " + tmp_dir + "/log.log " + dir + "/log_hrf_" + str(m_covs[j]) + ".log"
         os.system(cmd)
     cmd = "cp " + self.abs_path + "/config_hrf.yaml " + dir           
     os.system(cmd)
     
     if not os.path.exists(dir + "/environment"):
         os.makedirs(dir + "/environment") 
         
     cmd = "cp " + self.abs_path + "/" + str(self.environment_file) + " " + str(dir) + "/environment"
     os.system(cmd) 
         
     if not os.path.exists(dir + "/model"):
         os.makedirs(dir + "/model")
             
     cmd = "cp " + self.abs_path + "/" + self.robot_file + " " + dir + "/model"
     os.system(cmd)
     print "Done."