Esempio n. 1
0
def test_py_system_sst_custom_distance():
    '''
    Check that distance overriding in python works
    '''

    system = Acrobot()

    planner = _sst_module.SSTWrapper(
        state_bounds=system.get_state_bounds(),
        control_bounds=system.get_control_bounds(),
        # use custom distance computer
        distance=AcrobotDistance(),
        start_state=np.array([0., 0., 0., 0.]),
        goal_state=np.array([np.pi, 0., 0., 0.]),
        goal_radius=2.,
        random_seed=0,
        sst_delta_near=0.6,
        sst_delta_drain=0.4)

    min_time_steps = 10
    max_time_steps = 50
    integration_step = 0.02

    for iteration in range(100):
        planner.step(system, min_time_steps, max_time_steps, integration_step)
        im = planner.visualize_tree(system)
def neural_replanner(mpNet1, mpNet2, start_node, goal_node, real_goal_node, obc, obs, IsInCollisionWithObs, normalize, unnormalize, MAX_LENGTH, step_sz, num_steps, informer, init_informer, system, dynamics, enforce_bounds, traj_opt, data):
    # visualization
    print('SHEN ME GUI')
    print('step_sz: %f' % (step_sz))
    params = {}
    params['obs_w'] = 6.
    params['obs_h'] = 6.
    params['integration_step'] = step_sz
    vis = AcrobotVisualizer(Acrobot(), params)
    vis.obs = obs
    plt.ion()
    fig = plt.figure()
    ax = fig.add_subplot(121)
    ax.set_autoscale_on(True)
    hl, = ax.plot([], [], 'b')
    #hl_real, = ax.plot([], [], 'r')
    hl_for, = ax.plot([], [], 'g')
    hl_back, = ax.plot([], [], 'r')
    hl_for_mpnet, = ax.plot([], [], 'lightgreen')
    hl_back_mpnet, = ax.plot([], [], 'salmon')

    ax_ani = fig.add_subplot(122)
    vis._init(ax_ani)

    print(obs)
    def update_line(h, ax, new_data):
        new_data = wrap_angle(new_data, system)
        h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
        #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
        #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

    def remove_last_k(h, ax, k):
        h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

    def draw_update_line(ax):
        ax.relim()
        ax.autoscale_view()
        fig.canvas.draw()
        fig.canvas.flush_events()
        #plt.show()

    def animation(states, actions):
        vis._animate(states[-1], ax_ani)
        draw_update_line(ax_ani)
    dtheta = 0.1
    feasible_points = []
    infeasible_points = []
    goal_region = []
    imin = 0
    imax = int(2*np.pi/dtheta)
    obs_width = 6.
    new_obs_i = []
    for k in range(len(obs)):
        obs_pt = []
        obs_pt.append(obs[k][0]-obs_width/2)
        obs_pt.append(obs[k][1]-obs_width/2)
        obs_pt.append(obs[k][0]-obs_width/2)
        obs_pt.append(obs[k][1]+obs_width/2)
        obs_pt.append(obs[k][0]+obs_width/2)
        obs_pt.append(obs[k][1]+obs_width/2)
        obs_pt.append(obs[k][0]+obs_width/2)
        obs_pt.append(obs[k][1]-obs_width/2)
        new_obs_i.append(obs_pt)

    IsInCollision = lambda x: IsInCollisionWithObs(x, new_obs_i)
    for i in range(imin, imax):
        for j in range(imin, imax):
            x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
            if IsInCollision(x):
                infeasible_points.append(x)
            else:
                feasible_points.append(x)
                if goal_check(Node(x), real_goal_node):
                    goal_region.append(x)
    feasible_points = np.array(feasible_points)
    infeasible_points = np.array(infeasible_points)
    goal_region = np.array(goal_region)
    ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
    ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')
    ax.scatter(goal_region[:,0], goal_region[:,1], c='green')
    for i in range(len(data)):
        update_line(hl, ax, data[i])
    draw_update_line(ax)
    update_line(hl_for, ax, start_node.x)
    draw_update_line(ax)
    update_line(hl_back, ax, goal_node.x)
    draw_update_line(ax)
    
    # plan a mini path from start to goal
    # obs: tensor
    start = start_node.x
    goal = goal_node.x
    itr=0
    pA=[]
    pA.append(start)
    pB=[]
    pB.append(goal)
    target_reached=0
    tree=0
    new_path = []
    
    # extract the state for the node
    start = torch.from_numpy(start_node.x).type(torch.FloatTensor)
    goal = torch.from_numpy(goal_node.x).type(torch.FloatTensor)
    start_numpy = start.numpy()
    goal_numpy = goal.numpy()
    x_init, u_init, t_init = init_informer(obs, Node(start_numpy), Node(goal_numpy), direction=0)
    goal_numpy__node, edge, cf = pathSteerTo(Node(start_numpy), Node(x_init[-1]), x_init, u_init, t_init, \
                                   dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                   step_sz=step_sz, num_steps=num_steps, propagating=False, endpoint=True)     
    if not node_nearby(edge.xs[0], start_numpy, np.identity(len(start_numpy)), 1e-2, system):
        goal_numpy__node, edge, cf = pathSteerTo(Node(start_numpy), Node(x_init[-1]), x_init, u_init, t_init, \
                                       dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                       step_sz=step_sz, num_steps=num_steps, propagating=True, endpoint=True)    
    target_reached=cf and node_nearby(goal_numpy__node.x, goal_numpy, np.diag([1.,1.,0.5,0.5]), np.sqrt(1.), system)
    if goal_check(Node(goal_numpy), real_goal_node):
        target_reached = target_reached and goal_check(goal_numpy__node, real_goal_node)
    #--- compute MPNet waypoints connecting start and goal
    while target_reached==0 and itr<MAX_LENGTH*4:
        itr=itr+1  # prevent the path from being too long
        if tree==0:
            ip1 = torch.cat((start, goal)).unsqueeze(0)
            ob1 = torch.FloatTensor(obc).unsqueeze(0)
            #ip1=torch.cat((obs,start,goal)).unsqueeze(0)
            time0 = time.time()
            ip1=normalize(ip1)
            ip1=to_var(ip1)
            ob1=to_var(ob1)
            waypoint=mpNet1(ip1,ob1).squeeze(0)
            # unnormalize to world size
            waypoint=waypoint.data.cpu()
            time0 = time.time()
            waypoint = unnormalize(waypoint).numpy()
            waypoint = wrap_angle(waypoint, system)
            waypoint = torch.from_numpy(waypoint).type(torch.FloatTensor)
            if IsInCollision(waypoint.numpy()):
                tree=1
                continue
            start = waypoint
            pA.append(start.numpy())
            tree=1
        else:
            #tree=0
            #continue
            ip2 = torch.cat((goal, start)).unsqueeze(0)
            ob2 = torch.FloatTensor(obc).unsqueeze(0)
            #ip2=torch.cat((obs,goal,start)).unsqueeze(0)
            time0 = time.time()
            ip2=normalize(ip2)
            ip2=to_var(ip2)
            ob2=to_var(ob2)
            waypoint=mpNet2(ip2,ob2).squeeze(0)
            # unnormalize to world size
            waypoint=waypoint.data.cpu()
            time0 = time.time()
            waypoint = unnormalize(waypoint).numpy()
            waypoint = wrap_angle(waypoint, system)
            waypoint = torch.from_numpy(waypoint).type(torch.FloatTensor)
            if IsInCollision(waypoint.numpy()):
                tree=0
                continue
            goal = waypoint
            pB.append(goal.numpy())
            tree=0
        #target_reached=MPNetSteerTo(start, goal, )
        #target_reached=node_nearby(start.numpy(), goal.numpy(), np.diag([1.,1.,0.1,0.1]), 1., system)
        # try to use trajopt and then test if endpoints can connect
        start_numpy = start.numpy()
        goal_numpy = goal.numpy()
        x_init, u_init, t_init = init_informer(obs, Node(start_numpy), Node(goal_numpy), direction=0)
        goal_numpy__node, edge, cf = pathSteerTo(Node(start_numpy), Node(x_init[-1]), x_init, u_init, t_init, \
                                       dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                       step_sz=step_sz, num_steps=num_steps, propagating=False, endpoint=True)     
        if not node_nearby(edge.xs[0], start_numpy, np.identity(len(start_numpy)), 1e-2, system):
            goal_numpy__node, edge, cf = pathSteerTo(Node(start_numpy), Node(x_init[-1]), x_init, u_init, t_init, \
                                           dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                           step_sz=step_sz, num_steps=num_steps, propagating=True, endpoint=True)  
        target_reached=cf and node_nearby(goal_numpy__node.x, goal_numpy, np.diag([1.,1.,0.5,0.5]), np.sqrt(1.), system)
        if goal_check(Node(goal_numpy), real_goal_node):
            target_reached = target_reached and goal_check(goal_numpy__node, real_goal_node)
        print('target reached: %d' % (target_reached))
        # might have some terminal conditions for MPNet endpoints

    #--- after MPNet waypoints, compute actual connection
    pB.reverse()
    # visualize pA and pB
    pA = np.array(pA)
    pB = np.array(pB)
    print('pa:')
    print(pA)
    print('pb:')
    print(pB)
    pA_scat = ax.scatter(pA[:,0], pA[:,1], c='lightgreen')
    pB_scat = ax.scatter(pB[:,0], pB[:,1], c='red')
    draw_update_line(ax)

    
    mpnet_path = np.concatenate([pA,pB], axis=0)
    mpnet_path_node = []
    for i in range(len(mpnet_path)):
        mpnet_path_node.append(Node(mpnet_path[i]))
    # each time find nearest waypoint, and connect them
    node = start_node
    current_idx = 0
    node_list = [node]
    while current_idx < len(mpnet_path)-1:
        # try connecting to goal
        x_init, u_init, t_init = init_informer(obs, node, goal_node, direction=0)
        next_node, edge, cf = pathSteerTo(node, Node(x_init[-1]), x_init, u_init, t_init, \
                                       dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                       step_sz=step_sz, num_steps=num_steps, propagating=False, endpoint=True) 
        if not node_nearby(edge.xs[0], node.x, np.identity(len(node.x)), 1e-2, system):
            next_node, edge, cf = pathSteerTo(node, Node(x_init[-1]), x_init, u_init, t_init, \
                                           dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                           step_sz=step_sz, num_steps=num_steps, propagating=True, endpoint=False)              
        if cf and goal_check(next_node, real_goal_node):
            node.next = next_node
            next_node.prev = node
            node.edge = edge
            edge.next = next_node
            node_list.append(next_node)
            break
        
        # find the min node
        min_d = 1e8
        min_i = -1
        for i in range(current_idx, len(mpnet_path)):
            if node_d(node.x, mpnet_path[i], np.diag([1.,1.,0.5,0.5]), system) <= min_d:
                min_d = node_d(node.x, mpnet_path[i], np.diag([1.,1.,0.5,0.5]), system)
                min_i = i
        if min_i == len(mpnet_path)-1:
            min_i = min_i - 1
        if min_d > 1.:
            print('too far')
            print(min_d)
            node_list = node_list[:-3]
            break  
        current_idx = min_i
        print('min_i:%d, len(mpnet_path):%d' % (min_i, len(mpnet_path)))
        # connect to min_i+1
        x_init, u_init, t_init = init_informer(obs, node, mpnet_path_node[min_i+1], direction=0)
        next_node, edge, cf = pathSteerTo(node, Node(x_init[-1]), x_init, u_init, t_init, \
                                       dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                       step_sz=step_sz, num_steps=num_steps, propagating=False, endpoint=False)
        if not node_nearby(edge.xs[0], node.x, np.identity(len(node.x)), 1e-2, system):
            next_node, edge, cf = pathSteerTo(node, Node(x_init[-1]), x_init, u_init, t_init, \
                                           dynamics, enforce_bounds, IsInCollision, traj_opt, 0, system,
                                           step_sz=step_sz, num_steps=num_steps, propagating=True, endpoint=False)            
        for i in range(len(edge.xs)):
            update_line(hl_for, ax, edge.xs[i])
        xs_to_plot = np.array(edge.xs[::10])
        for i in range(len(xs_to_plot)):
            xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
        ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='g')
        draw_update_line(ax)
        animation(edge.xs, edge.us)
        
        # might add some terminal condition: if too faraway OR collision
        if not cf:
            print('in collision as cf is false')
            node_list = node_list[:-2] # delete the last node as it cannot steerTo next node
            #node_list = node_list[:-3]  # delete several nodes
            break
        if goal_check(mpnet_path_node[-1], real_goal_node) and min_i+1 == len(mpnet_path)-1 and not goal_check(next_node, real_goal_node):
            print('endpoint not at goal')
            node_list = node_list[:-1]  # remove last node
            break
        #next_node = Node(next_x)
        node.edge = edge
        node.next = next_node
        edge.next = next_node
        next_node.prev = node
        node = next_node
        current_idx = min_i+1
        node_list.append(node)
    print('neural replanner over')
    print('len of node_list: %d' % (len(node_list)))
    if len(node_list) > 0:
        node_list[-1].edge = None
    # current_idx -> current_idx+1 fail, will use current_idx+2:end in mpnet_node
    node_list += mpnet_path_node[current_idx+2:]
    #node_list.append(goal_node)
    plt.waitforbuttonpress()
    pA_scat.remove()
    pB_scat.remove()
    return node_list
Esempio n. 3
0
def plan(obs,
         env,
         x0,
         xG,
         data,
         informer,
         init_informer,
         system,
         dynamics,
         enforce_bounds,
         IsInCollision,
         traj_opt,
         jac_A,
         jac_B,
         step_sz=0.02,
         MAX_LENGTH=1000):
    # informer: given (xt, x_desired) ->  x_t+1
    # jac_A: given (x, u) -> linearization A
    # jac B: given (x, u) -> linearization B
    # traj_opt: given (x0, x1) -> (xs, us, dts
    params = {}
    params['obs_w'] = 6.
    params['obs_h'] = 6.
    params['integration_step'] = step_sz
    vis = AcrobotVisualizer(Acrobot(), params)
    vis.obs = obs
    plt.ion()
    fig = plt.figure()
    ax = fig.add_subplot(121)
    ax.set_autoscale_on(True)
    hl, = ax.plot([], [], 'b')
    #hl_real, = ax.plot([], [], 'r')
    hl_for, = ax.plot([], [], 'g')
    hl_back, = ax.plot([], [], 'r')
    hl_for_mpnet, = ax.plot([], [], 'lightgreen')
    hl_back_mpnet, = ax.plot([], [], 'salmon')

    ax_ani = fig.add_subplot(122)
    vis._init(ax_ani)

    print(obs)

    def update_line(h, ax, new_data):
        new_data = wrap_angle(new_data, system)
        h.set_data(np.append(h.get_xdata(), new_data[0]),
                   np.append(h.get_ydata(), new_data[1]))
        #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
        #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

    def remove_last_k(h, ax, k):
        h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

    def draw_update_line(ax):
        ax.relim()
        ax.autoscale_view()
        fig.canvas.draw()
        fig.canvas.flush_events()
        #plt.show()

    def animation(states, actions):
        vis._animate(states[-1], ax_ani)
        draw_update_line(ax_ani)

    dtheta = 0.1
    feasible_points = []
    infeasible_points = []
    imin = 0
    imax = int(2 * np.pi / dtheta)
    for i in range(imin, imax):
        for j in range(imin, imax):
            x = np.array([dtheta * i - np.pi, dtheta * j - np.pi, 0., 0.])
            if IsInCollision(x):
                infeasible_points.append(x)
            else:
                feasible_points.append(x)
    feasible_points = np.array(feasible_points)
    infeasible_points = np.array(infeasible_points)
    ax.scatter(feasible_points[:, 0], feasible_points[:, 1], c='yellow')
    ax.scatter(infeasible_points[:, 0], infeasible_points[:, 1], c='pink')

    #update_line(hl, ax, x0.x)
    #draw_update_line(ax)
    for i in range(len(data)):
        update_line(hl, ax, data[i])
    draw_update_line(ax)
    update_line(hl_for, ax, x0.x)
    draw_update_line(ax)
    update_line(hl_back, ax, xG.x)
    draw_update_line(ax)
    #plt.waitforbuttonpress()

    itr = 0
    target_reached = 0
    tree = 0
    time_norm = 0.
    start = x0
    goal = xG
    funnel_node = goal
    BVP_TOLERANCE = 1e-6

    for_in_collision_nums = [0]  # forward in collision number
    for_prev_scatter = []
    back_in_collision_nums = [0]
    back_prev_scatter = []

    while target_reached == 0 and itr < MAX_LENGTH:
        itr = itr + 1  # prevent the path from being too long
        print('iter: %d' % (itr))
        if tree == 0:
            # since we ensure each step we can steer to the next waypoint
            # the edge connecting the two nodes will store the trajectory
            # information, TVLQR and the funnel size factors
            # the edge information is stored at the endpoint
            # here direciton=0 means we are computing forward steer, and 1 means
            # we are computing backward

            # the informed initialization is in the forward direction

            # try to connect from x0 to one node on the goal tree (nearest)
            node = xG
            min_node = node
            min_d = 1e6
            # min_d: normalized distance xTSx / (rho^2)
            while node is not None:
                dis = h_dist(x0, node, np.identity(len(node.x)), 2., system)
                if dis < min_d:
                    min_d = dis
                    min_node = node
                node = node.next

            xw, x_init, u_init, t_init = informer(env,
                                                  x0,
                                                  min_node,
                                                  direction=0)
            ax.scatter(xw.x[0], xw.x[1], c='lightgreen')
            draw_update_line(ax)
            x, e = pathSteerToBothDir(x0, xw, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \
                                    jac_A, jac_B, traj_opt, step_sz=step_sz, system=system, direction=0, propagating=True)
            if for_in_collision_nums[-1] >= 2 and x0.prev is not None:
                # backtrace, this include direct incollision nodes and indirect ones (parent)
                print('too many collisions... backtracing')
                # pop the last collision num
                for_in_collision_nums.pop(-1)
                # since the next state has collision, increase one for its parent
                for_in_collision_nums[-1] += 1
                for_prev_scatter[-1].remove()
                for_prev_scatter = for_prev_scatter[:-1]
                # remove line as well
                x0 = x0.prev
                if x0.edge is not None:
                    remove_last_k(hl_for, ax, len(x0.edge.xs))
                    draw_update_line(ax)
                tree = 1
                itr += 1
                continue
            if e is None:
                # in collision
                for_in_collision_nums[-1] += 1
                tree = 1
                itr += 1
                continue

            # otherwise, create a new collision_num
            for_in_collision_nums.append(0)

            # if the bvp solver solution is too faraway, then ignore it
            # this is useful if we only use trajopt result without propagation
            #if np.linalg.norm(e.xs[0] - x0.x) > BVP_TOLERANCE:
            #    # ignore it
            #    print('forward searching bvp not successful.')
            #    # then propagate it to obtain the result
            #    x, e = pathSteerToBothDir(x0, xw, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \
            #                        jac_A, jac_B, traj_opt, step_sz=step_sz, system=system, direction=0, propagating=True)
            for i in range(len(e.xs)):
                update_line(hl_for, ax, e.xs[i])
            xs_to_plot = np.array(e.xs[::10])
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
            for_prev_scat = ax.scatter(xs_to_plot[:, 0],
                                       xs_to_plot[:, 1],
                                       c='g')
            for_prev_scatter.append(for_prev_scat)

            draw_update_line(ax)
            animation(e.xs, e.us)
            # if connected to the min_node, then reset goal tree
            if h_dist(x, min_node, np.identity(len(min_node.x)), 2.,
                      system) <= 1.0:
                print('start tree is connected to one node in goal tree')
                if min_node.next is not None:
                    xG = min_node.next
            #plt.waitforbuttonpress()
            x0.next = x
            x.prev = x0
            e.next = x
            x0.edge = e
            x0 = x
            tree = 1
        else:
            #tree=0
            # skip directly
            #continue
            # the informed initialization is in the forward direction
            xw, x_init, u_init, t_init = informer(env, xG, x0, direction=1)
            # plot the informed point
            ax.scatter(xw.x[0], xw.x[1], c='red')
            draw_update_line(ax)
            """
            if back_in_collision_nums[-1] >= 5 and xG.next is not None:
                # backtrace, this include direct incollision nodes and indirect ones (parent)
                print('backward--too many collisions... backtracing')
                # pop the last collision num
                back_in_collision_nums.pop(-1)
                # since the next state has collision, increase one for its parent
                back_in_collision_nums[-1] += 1
                #back_prev_scatter[-1].remove()
                #back_prev_scatter = back_prev_scatter[:-1]
                xG = xG.next
                tree = 0
                itr += 1
                continue
            if IsInCollision(xw.x):
                # in collision
                back_in_collision_nums[-1] += 1
                tree = 0
                itr += 1
                continue
            # otherwise, create a new collision_num
            back_in_collision_nums.append(0)
            """
            #update_line(hl_back, ax, xG.x)
            #xs_to_plot = np.array(e.xs[::10])
            #for i in range(len(xs_to_plot)):
            #    xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
            #back_prev_scat = ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='r')
            #back_prev_scatter.append(back_prev_scat)

            #draw_update_line(ax)
            #plt.waitforbuttonpress()
            xw.next = xG
            xG.prev = xw
            xG = xw

            # check if x0 can actually connect to one of the nodes in goal tree
            node = xG
            min_node = node
            min_d = 1e6
            # min_d: normalized distance xTSx / (rho^2)
            while node is not None:
                # we need to make sure that current x0 is not near the node of interest,
                # to encourage exploration
                dis = h_dist(x0, node, np.identity(len(node.x)), 2., system)
                if dis < min_d and dis > 1.0:
                    min_d = dis
                    min_node = node
                node = node.next

            x_init, u_init, t_init = init_informer(env,
                                                   x0,
                                                   min_node,
                                                   direction=0)
            x, e = pathSteerToBothDir(x0, min_node, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \
                                    jac_A, jac_B, traj_opt, step_sz=step_sz, system=system, direction=0, propagating=True)
            if e is None:
                # in collision
                tree = 0
                itr += 1
                continue
            if h_dist(x, min_node, np.identity(len(min_node.x)), 2.,
                      system) <= 1.0:
                print('start tree is connected to one node in goal tree')
                #if min_node.next is not None:
                #    xG = min_node.next
                xG = goal
                #plt.waitforbuttonpress()
                for_in_collision_nums.append(0)
                for i in range(len(e.xs)):
                    update_line(hl_for, ax, e.xs[i])
                xs_to_plot = np.array(e.xs[::10])
                for i in range(len(xs_to_plot)):
                    xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
                for_prev_scat = ax.scatter(xs_to_plot[:, 0],
                                           xs_to_plot[:, 1],
                                           c='g')
                for_prev_scatter.append(for_prev_scat)

                draw_update_line(ax)
                animation(e.xs, e.us)
                x0.next = x
                x.prev = x0
                e.next = x
                x0.edge = e
                x0 = x

            tree = 0

        # try connecting to goal
        x_init, u_init, t_init = init_informer(env, x0, goal, direction=0)
        xG_, e = pathSteerToBothDir(x0, goal, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \
                                jac_A, jac_B, traj_opt, step_sz=step_sz, system=system, direction=0, \
                                propagating=True, endpoint=True)

        if e is not None:
            # check if the BVP is successful
            if np.linalg.norm(e.xs[0] - x0.x) > BVP_TOLERANCE:
                # try propagating
                xG_, e = pathSteerToBothDir(x0, goal, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \
                                    jac_A, jac_B, traj_opt, step_sz=step_sz, system=system, direction=0, propagating=True)
                #itr += 1
                #continue

            # add xG_ to the start tree
            x0.next = xG_
            xG_.prev = x0
            x0.edge = e
            x0.edge.next = xG_

            print('endpoint steering...')
            print('x0:')
            print(x0.x)
            print('goal:')
            print(goal.x)
            print('xG_:')
            print(xG_.x)
            # find the nearest point from xG_ to points on the goal tree
            if h_dist(xG_, goal, goal.S0, goal.rho0, system) <= 1.0:
                # otherwise it is a nearby node
                #if min_node.S0 is None:
                #    lazyFunnel(min_node, funnel_node, dynamics, enforce_bounds, jac_A, jac_B, traj_opt, system=system, step_sz=step_sz)
                #    #funnel_node = min_node
                min_node = goal
                reached, node_i0, node_i1 = nearby(xG_, min_node, system)
                if reached:
                    target_reached = True
                    xs_to_plot = np.array(e.xs[::10])
                    for i in range(len(xs_to_plot)):
                        xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
                    ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 1], c='salmon')

                    #ax.scatter(e.xs[::10,0], e.xs[::10,1], c='salmon')
                    draw_update_line(ax)
                    # since the nearby nodes are on the edge, need to extract the node index
                    if min_node.edge is not None:
                        # need to extract subset of x1.edge
                        # change the t0 of edge starting from node to be time_knot[node_i] (use subset of edge)
                        edge = min_node.edge
                        edge.t0 = edge.time_knot[node_i1]
                        edge.i0 = node_i1
                        # change the node to be xs[node_i], S0 to be S(time_knot[node_i]), rho0 to be rho0s[node_i]
                        new_node = Node(wrap_angle(edge.xs[node_i1], system))
                        new_node.S0 = edge.S(edge.t0).reshape(
                            (len(edge.xs[node_i1]), len(edge.xs[node_i1])))
                        new_node.rho0 = edge.rho0s[node_i1]
                        new_node.edge = edge
                        new_node.next = min_node.next
                        min_node = new_node
                    xG = min_node
                    x0 = xG_
                    # again print out the xTSx
                    print('xTSx:')
                    print(node_h_dist(x0.x, xG.x, xG.S0, xG.rho0, system))
                    # print for the edge endpoint
                    print('for edge endpoint:')
                    print('xTSx/rho0^2:')
                    print(
                        node_h_dist(x0.prev.edge.xs[-1], xG.x, xG.S0, xG.rho0,
                                    system))
                    break
        itr += 1

    if target_reached:
        print('target reached.')
        # it is near enough, so we connect in the node data structure from x0 to xG, although the endpoint of x0.edge
        # in state is still xG_
        x0 = x0.prev  # since the x0 can directly connect to xG, we only need to set the next state of the previous x to xG
        x0.next = xG  # update endpoint (or should I?)
        x0.edge.next = xG
        xG.prev = x0
        # connect the lsat node
        # construct the funnel later
        # connect from x0 to xG, the endpoint of x0 is xG_, but it is near xG
        print('before funnelsteerto')
        #lazyFunnel(start, xG, dynamics, enforce_bounds, jac_A, jac_B, traj_opt, system=system, step_sz=step_sz)
        #print(start.edge.rho0s)
        #funnelSteerTo(x0, xG, dynamics, enforce_bounds, jac_A, jac_B, traj_opt, direction=0, system=system, step_sz=step_sz)
        print('after funnelsteerto')

        #xG_.next = xG
        #e_.next = xG
        #xG_.edge = e_
    else:
        x0.next = None
        x0.edge = None

    # construct a list of the path
    path_list = []
    node = start
    while node is not None:
        path_list.append(node.x)
        node = node.next
    if not target_reached:
        # xG is the first in the goal tree
        while xG is not None:
            path_list.append(xG.x)
            xG = xG.next
    plt.cla()
    return target_reached, path_list
Esempio n. 4
0
                # adaptive step size on replanning attempts
                res, path_list = plan(obs_i, obc_i, start, goal, detail_paths, informer, init_informer, system, dynamics, \
                           enforce_bounds, collision_check, traj_opt, jac_A, jac_B, step_sz=step_sz, MAX_LENGTH=2000)
                fig = plt.figure()
                ax = fig.add_subplot(111)
                # after plan, generate the trajectory, and check if it is within the region
                xs = plot_trajectory(ax, start, goal, dynamics, enforce_bounds,
                                     collision_check, step_sz)

                params = {}
                params['obs_w'] = 6.
                params['obs_h'] = 6.
                params['integration_step'] = step_sz
                fig = plt.figure()
                ax = fig.add_subplot(111)
                animator = AcrobotVisualizer(Acrobot(), params)
                animation_acrobot(fig, ax, animator, xs, obs_i)
                plt.waitforbuttonpress()
                #print('after neural replan:')
                #print(path)
                #path = lvc(path, obc[i], IsInCollision, step_sz=step_sz)
                #print('after lvc:')
                #print(path)
                if res:
                    fp = 1
                    print('feasible ok!')
                    break
                #if feasibility_check(bvp_solver, path, obc_i, IsInCollision, step_sz=0.01):
                #    fp = 1
                #    print('feasible, ok!')
                #    break
def plan_mpnet(obs, env, x0, xG, data, informer, init_informer, system, dynamics, enforce_bounds, IsInCollision, traj_opt, jac_A, jac_B, step_sz=0.02, MAX_LENGTH=1000):
    # informer: given (xt, x_desired) ->  x_t+1
    # jac_A: given (x, u) -> linearization A
    # jac B: given (x, u) -> linearization B
    # traj_opt: given (x0, x1) -> (xs, us, dts
    params = {}
    params['obs_w'] = 6.
    params['obs_h'] = 6.
    params['integration_step'] = step_sz
    vis = AcrobotVisualizer(Acrobot(), params)
    vis.obs = obs
    plt.ion()
    fig = plt.figure()
    ax = fig.add_subplot(121)
    ax.set_autoscale_on(True)
    hl, = ax.plot([], [], 'b')
    #hl_real, = ax.plot([], [], 'r')
    hl_for, = ax.plot([], [], 'g')
    hl_back, = ax.plot([], [], 'r')
    hl_for_mpnet, = ax.plot([], [], 'lightgreen')
    hl_back_mpnet, = ax.plot([], [], 'salmon')

    ax_ani = fig.add_subplot(122)
    vis._init(ax_ani)

    print(obs)
    def update_line(h, ax, new_data):
        new_data = wrap_angle(new_data, system)
        h.set_data(np.append(h.get_xdata(), new_data[0]), np.append(h.get_ydata(), new_data[1]))
        #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
        #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

    def remove_last_k(h, ax, k):
        h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

    def draw_update_line(ax):
        ax.relim()
        ax.autoscale_view()
        fig.canvas.draw()
        fig.canvas.flush_events()
        #plt.show()

    def animation(states, actions):
        vis._animate(states[-1], ax_ani)
        draw_update_line(ax_ani)
    dtheta = 0.1
    feasible_points = []
    infeasible_points = []
    imin = 0
    imax = int(2*np.pi/dtheta)
    for i in range(imin, imax):
        for j in range(imin, imax):
            x = np.array([dtheta*i-np.pi, dtheta*j-np.pi, 0., 0.])
            if IsInCollision(x):
                infeasible_points.append(x)
            else:
                feasible_points.append(x)
    feasible_points = np.array(feasible_points)
    infeasible_points = np.array(infeasible_points)
    ax.scatter(feasible_points[:,0], feasible_points[:,1], c='yellow')
    ax.scatter(infeasible_points[:,0], infeasible_points[:,1], c='pink')


    #update_line(hl, ax, x0.x)
    #draw_update_line(ax)
    for i in range(len(data)):
        print(data[i])
        update_line(hl, ax, data[i])
    draw_update_line(ax)
    update_line(hl_for, ax, x0.x)
    draw_update_line(ax)
    update_line(hl_back, ax, xG.x)
    draw_update_line(ax)
    #plt.waitforbuttonpress()

    itr=0
    target_reached=0
    tree=0
    time_norm = 0.
    start = x0
    goal = xG
    funnel_node = goal
    BVP_TOLERANCE = 1e-6

    for_in_collision_nums = [0]  # forward in collision number
    for_prev_scatter = []
    back_in_collision_nums = [0]
    back_prev_scatter = []
    while target_reached==0 and itr<MAX_LENGTH:
        itr=itr+1  # prevent the path from being too long
        print('iter: %d' % (itr))
        if tree==0:
            # since we ensure each step we can steer to the next waypoint
            # the edge connecting the two nodes will store the trajectory
            # information, TVLQR and the funnel size factors
            # the edge information is stored at the endpoint
            # here direciton=0 means we are computing forward steer, and 1 means
            # we are computing backward

            # the informed initialization is in the forward direction
            xw, x_init, u_init, t_init = informer(env, x0, goal, direction=0)
            
            ax.scatter(xw.x[0], xw.x[1], c='lightgreen')
            draw_update_line(ax)
            plt.waitforbuttonpress()
            x0 = Node(xw.x)
            tree=1
        else:
            xw, x_init, u_init, t_init = informer(env, xG, start, direction=1)
            ax.scatter(xG.x[0], xG.x[1], c='black')
            ax.scatter(start.x[0], start.x[1], c='blue')
            
            ax.scatter(xw.x[0], xw.x[1], c='red')
            print(xw.x)
            draw_update_line(ax)
            plt.waitforbuttonpress()
            xG = Node(xw.x)
            tree=0
Esempio n. 6
0
def plan(obs,
         env,
         x0,
         xG,
         data,
         informer,
         init_informer,
         system,
         dynamics,
         enforce_bounds,
         IsInCollisionWithObs,
         traj_opt,
         step_sz=0.02,
         num_steps=21,
         MAX_LENGTH=200,
         verbose=False):
    """
    For each node xt, we record how many explorations have been made from xt. We do planning according to the following rules:
    1. if n_explored >= n_max_explore:
        if not the root, backtrace to the previous node

    hat(x)_t+1 = informer(xt, G)
    x_t+1, tau = BVP(xt, hat(x)_t+1)
    n_exlored(x_t) += 1
    2. if explored(x_t+1) (w.r.t. some regions as defined in SST paper)
        ignore x_t+1
        # may also update the "score" using the already explored region
    3. if too_far(x_t+1, hat(x)_t+1)
        ignore x_t+1
    4. if inCollision(x_t+1)
        ignore x_t+1
        # may also update the "score" of points

    # otherwise it is safe
    establish the connections between x_t and x_t+1,
    move to x_t+1
    """
    env_constr = standard_cpp_systems.RectangleObs
    propagate_system = env_constr(obs, 6., 'acrobot')
    planner = _sst_module.SSTWrapper(
        state_bounds=propagate_system.get_state_bounds(),
        control_bounds=propagate_system.get_control_bounds(),
        distance=propagate_system.distance_computer(),
        start_state=x0.x,
        goal_state=xG.x,
        goal_radius=10.,
        random_seed=0,
        sst_delta_near=0.01,
        sst_delta_drain=0.005)

    obs_width = 6.
    new_obs_i = []
    for k in range(len(obs)):
        obs_pt = []
        obs_pt.append(obs[k][0] - obs_width / 2)
        obs_pt.append(obs[k][1] - obs_width / 2)
        obs_pt.append(obs[k][0] - obs_width / 2)
        obs_pt.append(obs[k][1] + obs_width / 2)
        obs_pt.append(obs[k][0] + obs_width / 2)
        obs_pt.append(obs[k][1] + obs_width / 2)
        obs_pt.append(obs[k][0] + obs_width / 2)
        obs_pt.append(obs[k][1] - obs_width / 2)
        new_obs_i.append(obs_pt)

    IsInCollision = lambda x: IsInCollisionWithObs(x, new_obs_i)

    # visualization
    if verbose:
        print('step_sz: %f' % (step_sz))
        params = {}
        params['obs_w'] = 6.
        params['obs_h'] = 6.
        params['integration_step'] = step_sz
        vis = AcrobotVisualizer(Acrobot(), params)
        vis.obs = obs
        plt.ion()
        fig = plt.figure()
        ax = fig.add_subplot(121)
        ax.set_autoscale_on(True)
        hl, = ax.plot([], [], 'b')
        #hl_real, = ax.plot([], [], 'r')
        hl_for, = ax.plot([], [], 'g')
        hl_back, = ax.plot([], [], 'r')
        hl_for_mpnet, = ax.plot([], [], 'lightgreen')
        hl_back_mpnet, = ax.plot([], [], 'salmon')

        ax_ani = fig.add_subplot(122)
        vis._init(ax_ani)

        print(obs)

        def update_line(h, ax, new_data):
            new_data = wrap_angle(new_data, system)
            h.set_data(np.append(h.get_xdata(), new_data[0]),
                       np.append(h.get_ydata(), new_data[1]))
            #h.set_xdata(np.append(h.get_xdata(), new_data[0]))
            #h.set_ydata(np.append(h.get_ydata(), new_data[1]))

        def remove_last_k(h, ax, k):
            h.set_data(h.get_xdata()[:-k], h.get_ydata()[:-k])

        def draw_update_line(ax):
            ax.relim()
            ax.autoscale_view()
            fig.canvas.draw()
            fig.canvas.flush_events()
            #plt.show()

        def animation(states, actions):
            vis._animate(states[-1], ax_ani)
            draw_update_line(ax_ani)

        dtheta = 0.1
        feasible_points = []
        infeasible_points = []
        goal_region = []
        imin = 0
        imax = int(2 * np.pi / dtheta)

        for i in range(imin, imax):
            for j in range(imin, imax):
                x = np.array([dtheta * i - np.pi, dtheta * j - np.pi, 0., 0.])
                if IsInCollision(x):
                    infeasible_points.append(x)
                else:
                    feasible_points.append(x)
                    if goal_check(Node(x), xG, system):
                        goal_region.append(x)
        feasible_points = np.array(feasible_points)
        infeasible_points = np.array(infeasible_points)
        goal_region = np.array(goal_region)
        ax.scatter(feasible_points[:, 0], feasible_points[:, 1], c='yellow')
        ax.scatter(infeasible_points[:, 0], infeasible_points[:, 1], c='pink')
        ax.scatter(goal_region[:, 0], goal_region[:, 1], c='green')
        for i in range(len(data)):
            update_line(hl, ax, data[i])
        draw_update_line(ax)
        update_line(hl_for, ax, x0.x)
        draw_update_line(ax)
        update_line(hl_back, ax, xG.x)
        draw_update_line(ax)
    # visualization end

    env = torch.FloatTensor(env)
    env = to_var(env)
    #explored_nodes = [x0]

    xt = x0
    xt.cost = 0.
    fes = False

    for itr in range(MAX_LENGTH):
        # pop nodes from frontier_nodes
        if xt is None:
            xt = x0
        #print('start state:')
        #print(xt.x)
        # try connecting to goal every now and then
        x_init, u_init, t_init = init_informer(env, xt, xG, direction=0)
        x_G_, edge, valid = pathSteerTo(xt, xG, x_init, u_init, t_init, dynamics, enforce_bounds, IsInCollision, \
                                traj_opt, step_sz=step_sz, num_steps=num_steps, system=system, direction=0, propagating=True)
        # visualization
        if verbose:
            xs_to_plot = np.array(edge.xs[::10])
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
            ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 1], c='orange')
            draw_update_line(ax)
        # visualization end

        if edge is not None:
            # check goal for each node
            for i in range(len(edge.xs)):
                if goal_check(Node(wrap_angle(edge.xs[i], system)), xG,
                              system):
                    print('bingo')
                    return 1

            xt.edge = None
            xt.next = None

        xs, us, dts = planner.step_bvp(propagate_system, system, xt.x,
                                       x_init[-1], 400, num_steps, step_sz,
                                       x_init, u_init, t_init)

        if len(us) != 0:
            #xs_to_plot = np.array(edge.xs[::10])
            #for i in range(len(xs_to_plot)):
            #    xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
            x_t_1 = xs[-1]

            # visualization
            if verbose:
                ax.scatter(x_t_1[0], x_t_1[1], c='orange')
                #ax.scatter(xs_to_plot[:,0], xs_to_plot[:,1], c='orange')

                draw_update_line(ax)
            # visualization end

            edge_dt = np.sum(dts)
            start = xt
            goal = Node(wrap_angle(xs[-1], system))

            # after trajopt, make actions of dimension 2
            us = us.reshape(len(us), -1)
            # notice that controller time starts from 0, hence locally need to shift the time by minusing t0_edges
            # start from 0
            time_knot = np.cumsum(dts)
            time_knot = np.insert(time_knot, 0, 0.)
            # can also change the resolution by the following function (for instance, every 10)
            edge = Edge(xs, us, dts, time_knot, edge_dt)
            edge.next = goal
            start.edge = edge
            start.next = goal
            goal.prev = start
            #goal.n_explored = 0
            #goal.cost = xt.cost + xt.edge.dt
            """
            # push to frontier
            tie_breaker += 1
            entry = (goal.cost+h_cost(goal,xG,system), tie_breaker, goal)
            heapq.heappush(frontier_nodes, entry)

            if goal_check(Node(x_t_1), xG, system):
                print('bingo!')
                fes = True
                break

            """

        xw, x_init, u_init, t_init = informer(env, xt, xG, direction=0)

        # visualization
        if verbose:
            xw_scat = ax.scatter(xw.x[0], xw.x[1], c='lightgreen')
            draw_update_line(ax)
        # visualization end

        xs, us, dts = planner.step_bvp(propagate_system, system, xt.x, xw.x,
                                       400, num_steps, step_sz, x_init, u_init,
                                       t_init)
        #print('xs:')
        #print(xs)
        # stop at the last node that is not in collision
        if len(us) == 0:
            print('step_bvp failed')
            # the same node, it hasn't changed
            xt = None
            continue
        # establish connections to x_t+1
        edge_dt = np.sum(dts)
        goal = Node(wrap_angle(xs[-1], system))

        # after trajopt, make actions of dimension 2
        us = us.reshape(len(us), -1)
        # notice that controller time starts from 0, hence locally need to shift the time by minusing t0_edges
        # start from 0
        time_knot = np.cumsum(dts)
        time_knot = np.insert(time_knot, 0, 0.)
        xt = goal

        #for i in range(len(edge.xs)):
        #    update_line(hl_for, ax, edge.xs[i])

        # visualization
        if verbose:
            xs_to_plot = np.array(xs[::5])
            for i in range(len(xs_to_plot)):
                xs_to_plot[i] = wrap_angle(xs_to_plot[i], system)
            ax.scatter(xs_to_plot[:, 0], xs_to_plot[:, 1], c='g')
            draw_update_line(ax)
            animation(xs, us)
        # visualization end

        # check if the new node is near goal
        if goal_check(goal, xG, system):
            print("success")
            fes = True
            break
        if fes:
            break

        # check if the new node is near goal
        if goal_check(xt, xG, system):
            print("success")
            fes = True
            break
    return fes
Esempio n. 7
0
def eval_tasks_mpnet(mpNet0,
                     mpNet1,
                     env_type,
                     test_data,
                     save_dir,
                     data_type,
                     normalize_func=lambda x: x,
                     unnormalize_func=lambda x: x,
                     dynamics=None,
                     jac_A=None,
                     jac_B=None,
                     enforce_bounds=None,
                     IsInCollision=None):
    # data_type: seen or unseen
    obc, obs, paths, sgs, path_lengths, controls, costs = test_data
    if obs is not None:
        obc = obc.astype(np.float32)
        obc = torch.from_numpy(obc)
    if torch.cuda.is_available():
        obc = obc.cuda()

    if env_type == 'pendulum':
        system = standard_cpp_systems.PSOPTPendulum()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 2, 1, 0)
        step_sz = 0.002
        num_steps = 20

    elif env_type == 'cartpole_obs':
        #system = standard_cpp_systems.RectangleObs(obs[i], 4.0, 'cartpole')
        system = _sst_module.CartPole()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.002
        num_steps = 20
        goal_S0 = np.identity(4)
        goal_rho0 = 1.0
    elif env_type in [
            'acrobot_obs', 'acrobot_obs_2', 'acrobot_obs_3', 'acrobot_obs_4',
            'acrobot_obs_8'
    ]:
        #system = standard_cpp_systems.RectangleObs(obs[i], 6.0, 'acrobot')
        obs_width = 6.0
        system = _sst_module.PSOPTAcrobot()
        bvp_solver = _sst_module.PSOPTBVPWrapper(system, 4, 1, 0)
        step_sz = 0.02
        num_steps = 20
        goal_S0 = np.diag([1., 1., 0, 0])
        #goal_S0 = np.identity(4)
        goal_rho0 = 1.0

    circular = system.is_circular_topology()

    def informer(env, x0, xG, direction):
        x0_x = torch.from_numpy(x0.x).type(torch.FloatTensor)
        xG_x = torch.from_numpy(xG.x).type(torch.FloatTensor)
        x0_x = normalize_func(x0_x)
        xG_x = normalize_func(xG_x)
        if torch.cuda.is_available():
            x0_x = x0_x.cuda()
            xG_x = xG_x.cuda()
        if direction == 0:
            x = torch.cat([x0_x, xG_x], dim=0)
            mpNet = mpNet0
            if torch.cuda.is_available():
                x = x.cuda()
            next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data
            print('next state:')
            print(next_state)
            next_state = unnormalize_func(next_state).numpy()[0]
            print('after unnormalize:')
            print(next_state)
            delta_x = next_state - x0.x
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        if delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi
            res = Node(next_state)
            x_init = np.linspace(x0.x, x0.x + delta_x, num_steps)
            ## TODO: : change this to general case
            u_init_i = np.random.uniform(low=[-4.], high=[4])
            #u_init_i = control[max_d_i]
            cost_i = num_steps * step_sz
            u_init = np.repeat(u_init_i, num_steps,
                               axis=0).reshape(-1, len(u_init_i))
            u_init = u_init + np.random.normal(scale=1.)
            t_init = np.linspace(0, cost_i, num_steps)
        else:
            x = torch.cat([x0_x, xG_x], dim=0)
            mpNet = mpNet1
            next_state = mpNet(x.unsqueeze(0), env.unsqueeze(0)).cpu().data
            print('next state:')
            print(next_state)
            next_state = unnormalize_func(next_state).numpy()[0]
            print('after unnormalize:')
            print(next_state)
            delta_x = x0.x - next_state
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        elif delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi
            #next_state = state[max_d_i] + delta_x
            res = Node(next_state)
            # initial: from max_d_i to max_d_i+1
            x_init = np.linspace(next_state, next_state + delta_x,
                                 num_steps)  # + rand_x_init
            # action: copy over to number of steps
            u_init_i = np.random.uniform(low=[-4.], high=[4])
            cost_i = num_steps * step_sz
            u_init = np.repeat(u_init_i, num_steps,
                               axis=0).reshape(-1, len(u_init_i))
            u_init = u_init + np.random.normal(scale=1.)
            t_init = np.linspace(0, cost_i, num_steps)
        return res, x_init, u_init, t_init

    def init_informer(env, x0, xG, direction):
        if direction == 0:
            next_state = xG.x
            delta_x = next_state - x0.x
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        if delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi
            res = Node(next_state)
            x_init = np.linspace(x0.x, x0.x + delta_x, num_steps)
            ## TODO: : change this to general case
            u_init_i = np.random.uniform(low=[-4.], high=[4])
            #u_init_i = control[max_d_i]
            cost_i = num_steps * step_sz
            u_init = np.repeat(u_init_i, num_steps,
                               axis=0).reshape(-1, len(u_init_i))
            u_init = u_init + np.random.normal(scale=1.)
            t_init = np.linspace(0, cost_i, num_steps)
        else:
            next_state = xG.x
            delta_x = x0.x - next_state
            # can be either clockwise or counterclockwise, take shorter one
            for i in range(len(delta_x)):
                if circular[i]:
                    delta_x[i] = delta_x[i] - np.floor(
                        delta_x[i] / (2 * np.pi)) * (2 * np.pi)
                    if delta_x[i] > np.pi:
                        delta_x[i] = delta_x[i] - 2 * np.pi
                    # randomly pick either direction
                    rand_d = np.random.randint(2)
                    if rand_d < 1:
                        if delta_x[i] > 0.:
                            delta_x[i] = delta_x[i] - 2 * np.pi
                        elif delta_x[i] <= 0.:
                            delta_x[i] = delta_x[i] + 2 * np.pi
            #next_state = state[max_d_i] + delta_x
            res = Node(next_state)
            # initial: from max_d_i to max_d_i+1
            x_init = np.linspace(next_state, next_state + delta_x,
                                 num_steps)  # + rand_x_init
            # action: copy over to number of steps
            u_init_i = np.random.uniform(low=[-4.], high=[4])
            cost_i = num_steps * step_sz
            u_init = np.repeat(u_init_i, num_steps,
                               axis=0).reshape(-1, len(u_init_i))
            u_init = u_init + np.random.normal(scale=1.)
            t_init = np.linspace(0, cost_i, num_steps)
        return x_init, u_init, t_init

    fes_env = []  # list of list
    valid_env = []
    time_env = []
    time_total = []
    for i in range(len(paths)):
        time_path = []
        fes_path = []  # 1 for feasible, 0 for not feasible
        valid_path = []  # if the feasibility is valid or not
        # save paths to different files, indicated by i
        #print(obs, flush=True)
        # feasible paths for each env
        for j in range(len(paths[0])):
            state_i = []
            state = paths[i][j]
            # obtain the sequence
            p_start = paths[i][j][0]
            detail_paths = [p_start]
            detail_controls = []
            detail_costs = []
            state = [p_start]
            control = []
            cost = []
            for k in range(len(controls[i][j])):
                #state_i.append(len(detail_paths)-1)
                max_steps = int(costs[i][j][k] / step_sz)
                accum_cost = 0.
                print('p_start:')
                print(p_start)
                print('data:')
                print(paths[i][j][k])
                # modify it because of small difference between data and actual propagation
                p_start = paths[i][j][k]
                state[-1] = paths[i][j][k]
                for step in range(1, max_steps + 1):
                    p_start = dynamics(p_start, controls[i][j][k], step_sz)
                    p_start = enforce_bounds(p_start)
                    detail_paths.append(p_start)
                    detail_controls.append(controls[i][j])
                    detail_costs.append(step_sz)
                    accum_cost += step_sz
                    if (step % 20 == 0) or (step == max_steps):
                        state.append(p_start)
                        print('control')
                        print(controls[i][j])
                        control.append(controls[i][j][k])
                        cost.append(accum_cost)
                        accum_cost = 0.
            print('p_start:')
            print(p_start)
            print('data:')
            print(paths[i][j][-1])
            state[-1] = paths[i][j][-1]
            #############################

            time0 = time.time()
            time_norm = 0.
            fp = 0  # indicator for feasibility
            print("step: i=" + str(i) + " j=" + str(j))
            p1_ind = 0
            p2_ind = 0
            p_ind = 0
            if path_lengths[i][j] == 0:
                # invalid, feasible = 0, and path count = 0
                fp = 0
                valid_path.append(0)
            if path_lengths[i][j] > 0:
                fp = 0
                valid_path.append(1)
                #paths[i][j][0][1] = 0.
                #paths[i][j][path_lengths[i][j]-1][1] = 0.
                path = [paths[i][j][0], paths[i][j][path_lengths[i][j] - 1]]
                # plot the entire path
                #plt.plot(paths[i][j][:,0], paths[i][j][:,1])

                start = Node(path[0])
                #goal = Node(path[-1])
                goal = Node(sgs[i][j][1])
                goal.S0 = goal_S0
                goal.rho0 = goal_rho0  # change this later

                control = []
                time_step = []
                step_sz = step_sz
                MAX_NEURAL_REPLAN = 11
                if obs is None:
                    obs_i = None
                    obc_i = None
                else:
                    obs_i = obs[i]
                    obc_i = obc[i]
                    # convert obs_i center to points
                    new_obs_i = []
                    for k in range(len(obs_i)):
                        obs_pt = []
                        obs_pt.append(obs_i[k][0] - obs_width / 2)
                        obs_pt.append(obs_i[k][1] - obs_width / 2)
                        obs_pt.append(obs_i[k][0] - obs_width / 2)
                        obs_pt.append(obs_i[k][1] + obs_width / 2)
                        obs_pt.append(obs_i[k][0] + obs_width / 2)
                        obs_pt.append(obs_i[k][1] + obs_width / 2)
                        obs_pt.append(obs_i[k][0] + obs_width / 2)
                        obs_pt.append(obs_i[k][1] - obs_width / 2)
                        new_obs_i.append(obs_pt)
                    #obs_i = new_obs_i
                collision_check = lambda x: IsInCollision(x, new_obs_i)
                for t in range(MAX_NEURAL_REPLAN):
                    # adaptive step size on replanning attempts
                    res, path_list = plan_mpnet(obs_i, obc_i, start, goal, detail_paths, informer, init_informer, system, dynamics, \
                               enforce_bounds, collision_check, None, jac_A, jac_B, step_sz=step_sz, MAX_LENGTH=1000)

                    fig = plt.figure()
                    ax = fig.add_subplot(111)
                    # after plan, generate the trajectory, and check if it is within the region
                    xs = plot_trajectory(ax, start, goal, dynamics,
                                         enforce_bounds, collision_check,
                                         step_sz)

                    params = {}
                    params['obs_w'] = 6.
                    params['obs_h'] = 6.
                    params['integration_step'] = step_sz
                    fig = plt.figure()
                    ax = fig.add_subplot(111)
                    animator = AcrobotVisualizer(Acrobot(), params)
                    animation_acrobot(fig, ax, animator, xs, obs_i)
                    plt.waitforbuttonpress()

                    #print('after neural replan:')
                    #print(path)
                    #path = lvc(path, obc[i], IsInCollision, step_sz=step_sz)
                    #print('after lvc:')
                    #print(path)
                    if res:
                        fp = 1
                        print('feasible ok!')
                        break
                    #if feasibility_check(bvp_solver, path, obc_i, IsInCollision, step_sz=0.01):
                    #    fp = 1
                    #    print('feasible, ok!')
                    #    break
            if fp:
                # only for successful paths
                time1 = time.time() - time0
                time1 -= time_norm
                time_path.append(time1)
                print('test time: %f' % (time1))
                # write the path
                #print('planned path:')
                #print(path)
                #path = np.array(path)
                #np.savetxt('results/path_%d.txt' % (j), path)
                #np.savetxt('results/control_%d.txt' % (j), np.array(control))
                #np.savetxt('results/timestep_%d.txt' % (j), np.array(time_step))

            fes_path.append(fp)

        time_env.append(time_path)
        time_total += time_path
        print('average test time up to now: %f' % (np.mean(time_total)))
        fes_env.append(fes_path)
        valid_env.append(valid_path)
        print('accuracy up to now: %f' %
              (float(np.sum(fes_env)) / np.sum(valid_env)))
        time_path = save_dir + 'mpnet_%s_time.pkl' % (data_type)
        pickle.dump(time_env, open(time_path, "wb"))
        #print(fp/tp)
    return np.array(fes_env), np.array(valid_env)