Esempio n. 1
0
    def plan_one_path_bvp(env, start, end, out_queue, path_file, control_file,
                          cost_file, time_file):
        planner = _sst_module.SSTWrapper(
            state_bounds=env.get_state_bounds(),
            control_bounds=env.get_control_bounds(),
            distance=env.distance_computer(),
            start_state=start,
            goal_state=end,
            goal_radius=goal_radius,
            random_seed=random_seed,
            sst_delta_near=sst_delta_near,
            sst_delta_drain=sst_delta_drain)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obs: %d, path: %d' % (i, j))
        for iter in range(args.max_iter):
            if iter % 1000 == 0:
                print('still alive after %d iterations' % (iter))
            if iter % 100 == 0:
                # from time to time use the goal
                sample = end
                #planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step)
            else:
                sample = np.random.uniform(low=low, high=high)
                #planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step)
            #planner.step(env, min_time_steps, max_time_steps, integration_step)
            planner.step_with_sample(env, sample, min_time_steps,
                                     max_time_steps, integration_step)
            solution = planner.get_solution()
            # don't break the searching to find better solutions
            #if solution is not None:
            #    break
        plan_time = time.time() - time0
        if solution is None:
            out_queue.put(0)
        else:
            print('path succeeded.')
            path, controls, cost = solution
            print(path)
            path = np.array(path)
            controls = np.array(controls)
            cost = np.array(cost)

            file = open(path_file, 'wb')
            pickle.dump(path, file)
            file.close()
            file = open(control_file, 'wb')
            pickle.dump(controls, file)
            file.close()
            file = open(cost_file, 'wb')
            pickle.dump(cost, file)
            file.close()
            file = open(time_file, 'wb')
            pickle.dump(plan_time, file)
            file.close()
            out_queue.put(1)
    def plan_one_path_sst(env, start, end, path_file, control_file, cost_file,
                          time_file):
        planner = _sst_module.SSTWrapper(
            state_bounds=env.get_state_bounds(),
            control_bounds=env.get_control_bounds(),
            distance=_sst_module.TwoLinkAcrobotDistance(),
            start_state=start,
            goal_state=end,
            goal_radius=goal_radius,
            random_seed=random_seed,
            sst_delta_near=sst_delta_near,
            sst_delta_drain=sst_delta_drain)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obs: %d, path: %d' % (i, j))
        for iter in range(args.max_iter):
            print('iteration: %d' % (iter))
            planner.step(env, min_time_steps, max_time_steps, integration_step)
            print('after step')
            #planner.step_with_sample(env, sample, min_time_steps, max_time_steps, integration_step)
            #solution = planner.get_solution()
            # don't break the searching to find better solutions
            #if solution is not None:
            #    break
        solution = planner.get_solution()
        plan_time = time.time() - time0
        if solution is None:
            return 0
        else:
            print('path succeeded.')
            path, controls, cost = solution
            print(path)
            path = np.array(path)
            controls = np.array(controls)
            cost = np.array(cost)

            file = open(path_file, 'wb')
            pickle.dump(path, file)
            file.close()
            file = open(control_file, 'wb')
            pickle.dump(controls, file)
            file.close()
            file = open(cost_file, 'wb')
            pickle.dump(cost, file)
            file.close()
            file = open(time_file, 'wb')
            pickle.dump(plan_time, file)
            file.close()
            return 1
Esempio n. 3
0
def plan(obs,
         env,
         x0,
         xG,
         data,
         informer,
         init_informer,
         system,
         dynamics,
         enforce_bounds,
         IsInCollisionWithObs,
         traj_opt,
         step_sz=0.02,
         num_steps=21,
         MAX_LENGTH=1000):
    """
    For each node xt, we record how many explorations have been made from xt. We do planning according to the following rules:
    1. if n_explored >= n_max_explore:
        if not the root, backtrace to the previous node

    hat(x)_t+1 = informer(xt, G)
    x_t+1, tau = BVP(xt, hat(x)_t+1)
    n_exlored(x_t) += 1
    2. if explored(x_t+1) (w.r.t. some regions as defined in SST paper)
        ignore x_t+1
        # may also update the "score" using the already explored region
    3. if too_far(x_t+1, hat(x)_t+1)
        ignore x_t+1
    4. if inCollision(x_t+1)
        ignore x_t+1
        # may also update the "score" of points

    # otherwise it is safe
    establish the connections between x_t and x_t+1,
    move to x_t+1
    """
    env_constr = standard_cpp_systems.RectangleObs
    propagate_system = env_constr(obs, 6., 'acrobot')
    planner = _sst_module.SSTWrapper(
        state_bounds=propagate_system.get_state_bounds(),
        control_bounds=propagate_system.get_control_bounds(),
        distance=propagate_system.distance_computer(),
        start_state=x0.x,
        goal_state=xG.x,
        goal_radius=2.,
        random_seed=0,
        sst_delta_near=1.5,
        sst_delta_drain=1.)

    # visualization
    obs_width = 6.
    new_obs_i = []
    for k in range(len(obs)):
        obs_pt = []
        obs_pt.append(obs[k][0] - obs_width / 2)
        obs_pt.append(obs[k][1] - obs_width / 2)
        obs_pt.append(obs[k][0] - obs_width / 2)
        obs_pt.append(obs[k][1] + obs_width / 2)
        obs_pt.append(obs[k][0] + obs_width / 2)
        obs_pt.append(obs[k][1] + obs_width / 2)
        obs_pt.append(obs[k][0] + obs_width / 2)
        obs_pt.append(obs[k][1] - obs_width / 2)
        new_obs_i.append(obs_pt)

    IsInCollision = lambda x: IsInCollisionWithObs(x, new_obs_i)
    # visualization
    """
    print('step_sz: %f' % (step_sz))
    params = {}
    params['obs_w'] = 6.
    params['obs_h'] = 6.
    params['integration_step'] = step_sz
    vis = AcrobotVisualizer(Acrobot(), params)
    vis.obs = obs
    plt.ion()
    fig = plt.figure()
    ax = fig.add_subplot(121)
    ax.set_autoscale_on(True)
    hl, = ax.plot([], [], 'b')
    #hl_real, = ax.plot([], [], 'r')
    hl_for, = ax.plot([], [], 'g')
    hl_back, = ax.plot([], [], 'r')
    hl_for_mpnet, = ax.plot([], [], 'lightgreen')
    hl_back_mpnet, = ax.plot([], [], 'salmon')

    ax_ani = fig.add_subplot(122)
    vis._init(ax_ani)

    print(obs)
    def update_line(h, ax, new_data):
        new_data = wrap_angle(new_data, system)
        h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
        #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
        #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

    def remove_last_k(h, ax, k):
        h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

    def draw_update_line(ax):
        ax.relim()
        ax.autoscale_view()
        fig.canvas.draw()
        fig.canvas.flush_events()
        #plt.show()

    def animation(states, actions):
        vis._animate(states[-1], ax_ani)
        draw_update_line(ax_ani)
    dtheta = 0.1
    feasible_points = []
    infeasible_points = []
    goal_region = []
    imin = 0
    imax = int(2*np.pi/dtheta)


    for i in range(imin, imax):
        for j in range(imin, imax):
            x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
            if IsInCollision(x):
                infeasible_points.append(x)
            else:
                feasible_points.append(x)
                if goal_check(Node(x), xG, system):
                    goal_region.append(x)
    feasible_points = np.array(feasible_points)
    infeasible_points = np.array(infeasible_points)
    goal_region = np.array(goal_region)
    ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
    ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
    ax.scatter(goal_region[:,0], goal_region[:,1], c='green')
    for i in range(len(data)):
        update_line(hl, ax, data[i])
    draw_update_line(ax)
    update_line(hl_for, ax, x0.x)
    draw_update_line(ax)
    update_line(hl_back, ax, xG.x)
    draw_update_line(ax)
    """

    env = torch.FloatTensor(env)
    env = to_var(env)
    #explored_nodes = [x0]

    xt = x0
    max_explore = 1
    xt.n_explored = 0
    xt.cost = 0.
    xw_scat = None
    fes = False
    frontier_nodes = []
    tie_breaker = 0
    entry = (x0.cost + h_cost(x0, xG, system), tie_breaker, x0)
    heapq.heappush(frontier_nodes, entry)

    for itr in range(MAX_LENGTH):
        if xw_scat is not None:
            xw_scat.remove()
            xw_scat = None
        # pop nodes from frontier_nodes
        if len(frontier_nodes) == 0:
            tie_breaker = 0
            entry = (x0.cost + h_cost(x0, xG, system), tie_breaker, x0)
            heapq.heappush(frontier_nodes, entry)  # push it back
        entry = heapq.heappop(frontier_nodes)
        print('popping...')
        print("entry cost:")
        print(entry[0])

        xt = entry[2]

        if xt.n_explored >= max_explore and xt is not x0:
            xt = xt.prev
            #print('retreating...')
            continue

        # try connecting to goal every now and then
        x_init, u_init, t_init = init_informer(env, xt, xG, direction=0)
        x_G_, edge, valid = pathSteerTo(xt, xG, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \
                                traj_opt, step_sz=step_sz, num_steps=num_steps, system=system, direction=0, propagating=True)
        """
        xs_to_plot = np.array(edge.xs[::10])
        for i in range(len(xs_to_plot)):
            xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
        ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange')
        draw_update_line(ax)
        """
        if edge is not None and goal_check(x_G_, xG, system):
            print('bingo!')
            fes = True
            break
        """
        xs, us, dts = planner.step_bvp(propagate_system, system, xt.x, x_init[-1], 400, num_steps, step_sz, x_init, u_init, t_init)

        if len(us) != 0:
            #xs_to_plot = np.array(edge.xs[::10])
            #for i in range(len(xs_to_plot)):
            #    xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
            x_t_1 = xs[-1]
            ax.scatter(x_t_1[0], x_t_1[1], c='orange')
            #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange')

            draw_update_line(ax)
            edge_dt = np.sum(dts)
            start = xt
            goal = Node(wrap_angle(xs[-1], system))

            # after trajopt, make actions of dimension 2
            us = us.reshape(len(us), -1)
            # notice that controller time starts from 0, hence locally need to shift the time by minusing t0_edges
            # start from 0
            time_knot = np.cumsum(dts)
            time_knot = np.insert(time_knot, 0, 0.)
            # can also change the resolution by the following function (for instance, every 10)
            edge = Edge(xs, us, dts, time_knot, edge_dt)
            edge.next = goal
            start.edge = edge
            start.next = goal
            goal.prev = start
            goal.n_explored = 0
            goal.cost = xt.cost + xt.edge.dt
            # push to frontier
            tie_breaker += 1
            entry = (goal.cost+h_cost(goal,xG,system), tie_breaker, goal)
            heapq.heappush(frontier_nodes, entry)

            if goal_check(Node(x_t_1), xG, system):
                print('bingo!')
                fes = True
                break
        """

        for i in range(max_explore):
            xw, x_init, u_init, t_init = informer(env, xt, xG, direction=0)
            """
            xw_scat = ax.scatter(xw.x[0], xw.x[1], c='lightgreen')
            draw_update_line(ax)
            """
            xs, us, dts = planner.step_bvp(propagate_system, system, xt.x,
                                           xw.x, 400, num_steps, step_sz,
                                           x_init, u_init, t_init)
            #print('xs:')
            #print(xs)
            # stop at the last node that is not in collision
            if len(us) == 0:
                # the same node, it hasn't changed
                continue
            xt.n_explored += 1
            # establish connections to x_t+1
            edge_dt = np.sum(dts)
            start = xt
            goal = Node(wrap_angle(xs[-1], system))

            # after trajopt, make actions of dimension 2
            us = us.reshape(len(us), -1)
            # notice that controller time starts from 0, hence locally need to shift the time by minusing t0_edges
            # start from 0
            time_knot = np.cumsum(dts)
            time_knot = np.insert(time_knot, 0, 0.)
            # can also change the resolution by the following function (for instance, every 10)
            edge = Edge(xs, us, dts, time_knot, edge_dt)
            edge.next = goal
            start.edge = edge
            start.next = goal
            goal.prev = start
            #print('n_explored: %d' % (xt.n_explored))

            #for i in range(len(edge.xs)):
            #    update_line(hl_for, ax, edge.xs[i])
            """
            xs_to_plot = np.array(xs[::5])
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='g')
            draw_update_line(ax)
            animation(xs, us)
            """

            #x_t_1.prev = xt
            goal.n_explored = 0
            goal.cost = xt.cost + xt.edge.dt
            # push to frontier
            tie_breaker += 1
            entry = (goal.cost + h_cost(goal, xG, system), tie_breaker, goal)
            heapq.heappush(frontier_nodes, entry)

            # check if the new node is near goal
            if goal_check(goal, xG, system):
                print("success")
                fes = True
                break
        if fes:
            break

        # check if the new node is near goal
        if goal_check(xt, xG, system):
            print("success")
            fes = True
            break
    return fes
Esempio n. 4
0
    def plan_one_path(obs_i, obs, obc, detailed_data_path, data_path,
                      start_state, goal_state, goal_inform_state, cost_i,
                      max_iteration, out_queue_t, out_queue_cost, random_seed):
        if args.env_type == 'pendulum':
            system = standard_cpp_systems.PSOPTPendulum()
            bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
            step_sz = 0.002
            num_steps = 20
            traj_opt = lambda x0, x1: bvp_solver.solve(x0, x1, 200, num_steps,
                                                       1, 20, step_sz)

        elif args.env_type == 'cartpole_obs':
            #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 4., 'cartpole')
            #distance_computer = propagate_system.distance_computer()
            distance_computer = _sst_module.euclidean_distance(
                np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            num_steps = 21
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            #delta_near=.2
            #delta_drain=.1
            cost_threshold = 1.05
            min_time_steps = 10
            max_time_steps = 200
            #min_time_steps = 5
            #max_time_steps = 400
            integration_step = 0.002
        elif args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
            cost_threshold = 1.05
            min_time_steps = 5
            max_time_steps = 100
            integration_step = 0.02
        planner = _sst_module.SSTWrapper(
            state_bounds=propagate_system.get_state_bounds(),
            control_bounds=propagate_system.get_control_bounds(),
            distance=distance_computer,
            start_state=start_state,
            goal_state=goal_state,
            goal_radius=goal_radius,
            random_seed=random_seed,
            sst_delta_near=delta_near,
            sst_delta_drain=delta_drain)
        #print('creating planner...')
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()

        for i in range(max_iteration):
            planner.step(propagate_system, min_time_steps, max_time_steps,
                         integration_step)

            # early break for initial path
            solution = planner.get_solution()
            if solution is not None:
                #print('solution found already.')
                # based on cost break
                xs, us, ts = solution
                t_sst = np.sum(ts)
                #print(t_sst)
                #print(cost_i)
                if t_sst <= cost_i * cost_threshold:
                    print('solved in %d iterations' % (i))
                    break
        plan_time = time.time() - time0
        solution = planner.get_solution()
        xs, us, ts = solution
        print(np.linalg.norm(np.array(xs[-1]) - goal_state))
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        #ax.set_xlim(-30, 30)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()
            
        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dx = 1
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        #imax = int(2*30./dx)
        imax = int(2*np.pi/dtheta)
        jmin = 0
        jmax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(jmin, jmax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state

        xs_to_plot = np.array(detailed_data_path)
        for i in range(len(xs_to_plot)):
            xs_to_plot[i] = wrap_angle(detailed_data_path[i], propagate_system)
        ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='lightgreen', s=0.5)
        # draw start and goal
        #ax.scatter(start_state[0], goal_state[0], marker='X')
        draw_update_line(ax)
        #plt.waitforbuttonpress()

        
        if solution is not None:
            xs, us, ts = solution
            
            # propagate data
            p_start = xs[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(us)):
                #state_i.append(len(detail_paths)-1)
                print(ts[k])
                max_steps = int(np.round(ts[k]/step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, us[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = xs[-1]
            
            
            
            xs_to_plot = np.array(state)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green', s=0.5)
            start_state_np = np.array(start_state)
            goal_state_np = np.array(goal_state)
            ax.scatter([start_state_np[0]], [start_state_np[1]], c='blue', marker='*')
            ax.scatter([goal_state_np[0]], [goal_state_np[1]], c='red', marker='*')

            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()
        """

        # validate if the path contains collision
        if solution is not None:
            res_x, res_u, res_t = solution

            print('solution_x:')
            print(res_x)
            print('path_x:')
            print(np.array(data_path))

            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(np.round(res_t[k] / step_sz))
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                #p_start = res_x[k]
                #state[-1] = res_x[k]
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        if IsInCollision(p_start, obs_i):
                            print(
                                'collision happens at u_index: %d, step: %d' %
                                (k, step))
                        assert not IsInCollision(p_start, obs_i)

            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            #state[-1] = res_x[-1]
        # validation end

        print('plan time: %fs' % (plan_time))
        if solution is None:
            print('failed.')
            out_queue_t.put(-1)
            out_queue_cost.put(-1)
        else:
            print('path succeeded.')
            out_queue_t.put(plan_time)
            out_queue_cost.put(t_sst)
Esempio n. 5
0
from sparse_rrt import _sst_module
from sparse_rrt.systems import standard_cpp_systems
import numpy as np
import time

from sparse_rrt.systems.acrobot import Acrobot, AcrobotDistance
from sparse_rrt.systems.point import Point

# this is a test code for SST in CartPole environment
system = standard_cpp_systems.Point()

planner = _sst_module.SSTWrapper(state_bounds=system.get_state_bounds(),
                                 control_bounds=system.get_control_bounds(),
                                 distance=system.distance_computer(),
                                 start_state=np.array([0., 0.]),
                                 goal_state=np.array([9., 9.]),
                                 goal_radius=0.5,
                                 random_seed=0,
                                 sst_delta_near=0.4,
                                 sst_delta_drain=0.2)

number_of_iterations = 10000

min_time_steps = 20
max_time_steps = 200
integration_step = 0.002

print("Starting the planner.")

start_time = time.time()
state_bounds = system.get_state_bounds()
Esempio n. 6
0
    def plan_one_path_sst(env,
                          start,
                          end,
                          out_queue,
                          path_file,
                          control_file,
                          cost_file,
                          time_file,
                          env_id=None,
                          id_list=None):
        planner = _sst_module.SSTWrapper(
            state_bounds=env.get_state_bounds(),
            control_bounds=env.get_control_bounds(),
            distance=env.distance_computer(),
            start_state=start,
            goal_state=end,
            goal_radius=goal_radius,
            random_seed=random_seed,
            sst_delta_near=sst_delta_near,
            sst_delta_drain=sst_delta_drain)
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        #print('obs: %d, path: %d' % (i, j))
        for iter in range(args.max_iter):
            planner.step(env, min_time_steps, max_time_steps, integration_step)

            # if iter%5000 == 0:
            #     if planner.get_solution() is not None:
            #         print(time.time() - time0)
            if time.time() - time0 > args.max_time:
                break
        solution = planner.get_solution()
        plan_time = time.time() - time0
        if solution is None:
            out_queue.put(0)
        else:

            print('path succeeded.')
            path, controls, cost = solution
            print(path)
            path = np.array(path)
            controls = np.array(controls)
            cost = np.array(cost)

            file = open(path_file, 'wb')
            pickle.dump(path, file)
            file.close()
            file = open(control_file, 'wb')
            pickle.dump(controls, file)
            file.close()
            file = open(cost_file, 'wb')
            pickle.dump(cost, file)
            file.close()
            file = open(time_file, 'wb')
            pickle.dump(plan_time, file)
            file.close()

            assert env_id is not None
            assert id_list is not None

            saved = False
            while not saved:
                try:
                    table = load_occ_table(env_id)
                    table.add(tuple(id_list))
                    save_occ_table(env_id, table)
                    saved = True
                except EOFError:
                    time.sleep(3)
                    #pass

            out_queue.put(1)
Esempio n. 7
0
def experiment(env_id,
               traj_id,
               verbose=False,
               system='cartpole_obs',
               params=None):
    print("env {}, traj {}".format(env_id, traj_id))
    if system in ['cartpole_obs', 'acrobot_obs', 'car_obs']:
        obs_list = get_obs(system, env_id)[env_id]
    elif system in ['quadrotor_obs']:
        obs_list = get_obs_3d(system, env_id)[env_id].reshape(-1, 3)
    data = load_data(system, env_id, traj_id)
    ref_path = data['path']
    ref_cost = data['cost'].sum()
    start_goal = data['start_goal']
    min_time_steps = params['min_time_steps']
    max_time_steps = params['max_time_steps']
    integration_step = params['integration_step']

    if system == 'quadrotor_obs':
        env_constr = standard_cpp_systems.RectangleObs3D
        env = env_constr(obs_list, params['width'], 'quadrotor')
    else:
        print("unkown system")
        exit(-1)
    planner = _sst_module.SSTWrapper(state_bounds=env.get_state_bounds(),
                                     control_bounds=env.get_control_bounds(),
                                     distance=env.distance_computer(),
                                     start_state=start_goal[0],
                                     goal_state=start_goal[-1],
                                     goal_radius=params['goal_radius'],
                                     random_seed=params['random_seed'],
                                     sst_delta_near=params['sst_delta_near'],
                                     sst_delta_drain=params['sst_delta_drain'])
    solution = planner.get_solution()

    data_cost = np.sum(data['cost'])
    # th = 1.2 * data_cost
    ## start experiment
    tic = time.perf_counter()
    for iteration in tqdm(range(params['number_of_iterations'])):
        planner.step(env, min_time_steps, max_time_steps, integration_step)
        if iteration % 100 == 0:
            solution = planner.get_solution()
            if (solution is not None and solution[2].sum() <= ref_cost * 1.4
                ) or time.perf_counter() - tic > params[
                    'max_planning_time']:  #and np.sum(solution[2]) < th:
                break
    toc = time.perf_counter()

    costs = solution[2].sum() if solution is not None else np.inf
    result = {
        'env_id': env_id,
        'traj_id': traj_id,
        'planning_time': toc - tic,
        'successful': solution is not None,
        'costs': costs
    }

    print("\t{}, time: {} seconds, {}(ref:{}) costs".format(
        result['successful'], result['planning_time'], result['costs'],
        np.sum(data['cost'])))
    return result
Esempio n. 8
0
    def plan_one_path(obs_i, obs, obc, start_state, goal_state,
                      goal_inform_state, cost_i, max_iteration, out_queue_t,
                      out_queue_cost):
        if args.env_type in [
                'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3',
                'acrobot_obs_4', 'acrobot_obs_8'
        ]:
            #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
            obs_width = 6.0
            psopt_system = _sst_module.PSOPTAcrobot()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, 6., 'acrobot')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.02
            num_steps = 21
            goal_radius = 2.0
            random_seed = 0
            delta_near = 1.0
            delta_drain = 0.5
            device = 3
            num_sample = 10
            min_time_steps = 5
            max_time_steps = 100
            mpnet_goal_threshold = 2.0
            mpnet_length_threshold = 30
            pick_goal_init_threshold = 0.1
            pick_goal_end_threshold = 0.8
            pick_goal_start_percent = 0.4
        elif args.env_type == 'cartpole_obs':
            obs_width = 4.0
            psopt_system = _sst_module.PSOPTCartPole()
            propagate_system = standard_cpp_systems.RectangleObs(
                obs, obs_width, 'cartpole')
            distance_computer = propagate_system.distance_computer()
            #distance_computer = _sst_module.euclidean_distance(np.array(propagate_system.is_circular_topology()))
            step_sz = 0.002
            integration_step = 0.002
            num_steps = 101
            goal_radius = 1.5
            random_seed = 0
            delta_near = 2.0
            delta_drain = 1.2
            #delta_near=1.0
            #delta_drain=0.5
            device = 3
            num_sample = 10
            min_time_steps = 10
            max_time_steps = 200
            mpnet_goal_threshold = 2.0
            mpnet_length_threshold = 90
            pick_goal_init_threshold = 0.1
            pick_goal_end_threshold = 0.8
            pick_goal_start_percent = 0.4
        #print('creating planner...')

        #planner = vis_planners.DeepSMPWrapper(mlp_path, encoder_path, cost_mlp_path, cost_encoder_path, 200, num_steps, step_sz, propagate_system, device)

        planner = _sst_module.SSTWrapper(
            state_bounds=propagate_system.get_state_bounds(),
            control_bounds=propagate_system.get_control_bounds(),
            distance=distance_computer,
            start_state=start_state,
            goal_state=goal_state,
            goal_radius=goal_radius,
            random_seed=0,
            sst_delta_near=delta_near,
            sst_delta_drain=delta_drain)

        cost_threshold = cost_i * args.cost_threshold
        #cost_threshold = 100000000.
        # generate a path by using SST to plan for some maximal iterations
        time0 = time.time()
        print('before plan_tree_SMP...')
        print('start_state:')
        print(start_state)
        print('goal_state:')
        print(goal_inform_state)

        res_x, res_u, res_t = planner.plan_tree_SMP("sst", propagate_system, psopt_system, obc.flatten(), start_state, goal_state, goal_inform_state, \
                                goal_radius, max_iteration, distance_computer, \
                                delta_near, delta_drain, cost_threshold, \
                                num_sample, min_time_steps, max_time_steps, \
                                mpnet_goal_threshold, mpnet_length_threshold, \
                                pick_goal_init_threshold, pick_goal_end_threshold, \
                                pick_goal_start_percent)
        print('after plan_tree_SMP.')

        plan_time = time.time() - time0
        """
        # visualization
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(111)
        #ax.set_autoscale_on(True)
        ax.set_xlim(-np.pi, np.pi)
        ax.set_ylim(-np.pi, np.pi)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        #print(obs)
        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, propagate_system)
            h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            #ax.relim()
            #ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def wrap_angle(x, system):
            circular = system.is_circular_topology()
            res = np.array(x)
            for i in range(len(x)):
                if circular[i]:
                    # use our previously saved version
                    res[i] = x[i] - np.floor(x[i] / (2*np.pi))*(2*np.pi)
                    if res[i] > np.pi:
                        res[i] = res[i] - 2*np.pi
            return res
        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        imin = 0
        imax = int(2*np.pi/dtheta)


        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
                if IsInCollision(x, obs_i):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        print('feasible points')
        print(feasible_points)
        print('infeasible points')
        print(infeasible_points)
        ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
        ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
        #for i in range(len(data)):
        #    update_line(hl, ax, data[i])
        draw_update_line(ax)
        #state_t = start_state
                
        if len(res_u):
            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(res_t[k]/step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = res_x[k]
                state[-1] = res_x[k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = res_x[-1]
            
            
            
            xs_to_plot = np.array(state)
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], propagate_system)
                if IsInCollision(xs_to_plot[i], obs_i):
                    print('in collision')
            ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='green')
            # draw start and goal
            #ax.scatter(start_state[0], goal_state[0], marker='X')
            draw_update_line(ax)
            plt.waitforbuttonpress()
        """

        #im = planner.visualize_nodes(propagate_system)
        #sec = input('Let us wait for user input')
        #show_image_opencv(im, "planning_tree", wait=True)

        # validate if the path contains collision
        """
        if len(res_u):
            # propagate data
            p_start = res_x[0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(res_u)):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(res_t[k]/step_sz)
                accum_cost = 0.
                #print('p_start:')
                #print(p_start)
                #print('data:')
                #print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = res_x[k]
                state[-1] = res_x[k]
                for step in range(1,max_steps+1):
                    p_start = dynamics(p_start, res_u[k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    accum_cost += step_sz
                    if (step % 1 == 0) or (step == max_steps):
                        state.append(p_start)
                        #print('control')
                        #print(controls[i][j])
                        cost.append(accum_cost)
                        accum_cost = 0.
                        # check collision for the new state
                        assert not IsInCollision(p_start, obs_i)
                        
            #print('p_start:')
            #print(p_start)
            #print('data:')
            #print(paths[i][j][-1])
            state[-1] = res_x[-1]
        # validation end
        """

        print('plan time: %fs' % (plan_time))
        if len(res_x) == 0:
            print('failed.')
            out_queue_t.put(-1)
            out_queue_cost.put(-1.0)
        else:
            print('path succeeded.')
            print('cost: %f' % (np.sum(res_t)))
            print('cost_threshold: %f' % (cost_threshold))
            print('data cost: %f' % (cost_i))
            out_queue_t.put(plan_time)
            out_queue_cost.put(np.sum(res_t))