Ejemplo n.º 1
0
def main(grid):

    n_domains = 1
    n_traj = 1
    max_obs = 30,  #max number of obstacles
    max_obs_size = None,
    n_actions = 8
    gen = False

    if grid.size.width == 100 or grid.size.height == 100:
        k = 48
        im_size = 100
        training_file = 'trained/30k_no_block_dataset_vin_64x64.pth'
    elif grid.size.width == 64 or grid.size.height == 64:
        k = 48
        im_size = 64
        training_file = 'trained/30k_no_block_dataset_vin_64x64.pth'
    elif grid.size.width == 28 or grid.size.height == 28:
        k = 36
        im_size = 28
        training_file = 'trained/60k_no_block_att3_vin_28x28.pth'
    elif grid.size.width == 16 or grid.size.height == 16:
        k = 20
        im_size = 16
        training_file = 'trained/60k_no_block_att3_vin_16x16.pth'
    elif grid.size.width == 8 or grid.size.height == 8:
        k = 10
        im_size = 8
        training_file = 'trained/60k_no_block_att3_vin_8x8.pth'
    else:
        k = 48
        im_size = grid.size.width
        training_file = 'trained/30k_no_block_dataset_vin_64x64.pth'
    # Parsing training parameters

    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--weights',
        type=str,
        # default='trained/60k_no_block_att3_vin_8x8.pth',
        default=training_file,
        help='Path to trained weights')
    parser.add_argument('--plot', action='store_true', default=False)
    parser.add_argument('--gen', action='store_true', default=False)
    parser.add_argument('--imsize',
                        type=int,
                        default=im_size,
                        help='Size of image')
    parser.add_argument('--k',
                        type=int,
                        default=k,
                        help='Number of Value Iterations')
    parser.add_argument('--l_i',
                        type=int,
                        default=2,
                        help='Number of channels in input layer')
    parser.add_argument('--l_h',
                        type=int,
                        default=150,
                        help='Number of channels in first hidden layer')
    parser.add_argument(
        '--l_q',
        type=int,
        default=10,
        help='Number of channels in q layer (~actions) in VI-module')
    config = parser.parse_args()
    # print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@config  ",config)

    use_GPU = torch.cuda.is_available()
    # Instantiate a VIN model
    vin = VIN(config)
    # Load model parameters
    vin.load_state_dict(torch.load(config.weights))
    # Use GPU if available
    if use_GPU:
        vin = vin.cuda()
    counter, total_no_soln = 0, 0
    global data
    data = []
    t_list = []
    total_dev_non_rel, total_dev_rel = 0.0, 0.0
    total_dist, total_astar_dist = 0.0, 0.0
    metrics = True  #this enables displaying the distance left to reach goal upon a failure
    dist_remain_avg = 0.0
    for dom in range(n_domains):
        if gen:
            goal = [
                np.random.randint(config.imsize),
                np.random.randint(config.imsize)
            ]
            obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
            # Add obstacles to map
            n_obs = obs.add_n_rand_obs(max_obs)
            # Add border to map
            border_res = obs.add_border()
            # Ensure we have valid map
            if n_obs == 0 or not border_res:
                continue
            start = None
        else:
            wpn = True
            # path = './resources/maps/'
            # path = './resources/testing_maps/8x8/'
            # mp, goal, start = open_map(dom,path)
            print(grid)
            mp = grid.grid
            goal = [grid.goal.position.x, grid.goal.position.y]
            start = [grid.agent.position.x, grid.agent.position.y]
            # path = './maps/8_data_300'
            # mp, goal, start = open_map_list(dom,path)
            mp[start[1]][
                start[0]] = 0  #Set the start position as freespace too
            mp[goal[1]][goal[0]] = 0  #Set the goal position as freespace too

            goal = [
                goal[1], goal[0]
            ]  #swap them around, for the row col format (x = col not row)
            start = [start[1], start[0]]
            obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
            obs.dom = mp
            # print('Goal:', goal,'agent', start, 'gridb4im', mp)
        # Get final map
        #Between mp and im, the 0 should become 1., and the 1 should become 0.
        im = obs.get_final()

        # print('Grid: ', mp, '\n Agent', start, '\n Goal', goal)
        # print('Im:', im)
        #1 is obstacles.
        #set obs.dom as the mp

        # Generate gridworld from obstacle map
        # print(' im: %s ', im)
        # print('goal:', goal)
        # print('goal:', start)
        G = gridworld(im, goal[0], goal[1])
        # Get value prior
        # print('144')
        value_prior = G.get_reward_prior()
        # Sample random trajectories to our goal

        states_xy, states_one_hot = sample_trajectory(
            G, n_traj, start, gen)  #dijkstra trajectory
        # print('states_xy', states_xy[0] , len(states_xy[0]))
        if gen and len(states_xy[0]) > 0:
            save_image(G.image, (goal[0], goal[1]), states_xy[0][0], states_xy,
                       states_one_hot, counter)  #this saves the maps

        counter += 1
        for i in range(n_traj):
            if len(states_xy[i]) > 1:
                t0 = time.time()
                # Get number of steps to goal
                L = len(states_xy[i]) * 2
                # Allocate space for predicted steps
                pred_traj = np.zeros((L, 2))
                # Set starting position
                pred_traj[0, :] = states_xy[i][0, :]

                for j in range(1, L):
                    # Transform current state data
                    state_data = pred_traj[j - 1, :]
                    state_data = state_data.astype(np.int)
                    # Transform domain to Networks expected input shape
                    im_data = G.image.astype(np.int)
                    im_data = 1 - im_data
                    im_data = im_data.reshape(1, 1, config.imsize,
                                              config.imsize)
                    # Transfrom value prior to Networks expected input shape
                    value_data = value_prior.astype(np.int)
                    value_data = value_data.reshape(1, 1, config.imsize,
                                                    config.imsize)
                    # Get inputs as expected by network
                    X_in = torch.from_numpy(
                        np.append(im_data, value_data, axis=1)).float()
                    S1_in = torch.from_numpy(state_data[0].reshape(
                        [1, 1])).float()
                    S2_in = torch.from_numpy(state_data[1].reshape(
                        [1, 1])).float()
                    # Send Tensors to GPU if available
                    if use_GPU:
                        X_in = X_in.cuda()
                        S1_in = S1_in.cuda()
                        S2_in = S2_in.cuda()
                    # Wrap to autograd.Variable
                    X_in, S1_in, S2_in = Variable(X_in), Variable(
                        S1_in), Variable(S2_in)
                    # Forward pass in our neural net
                    _, predictions = vin(X_in, S1_in, S2_in, config)
                    _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
                    a = indices.data.numpy()[0][0]
                    # Transform prediction to indices
                    s = G.map_ind_to_state(pred_traj[j - 1, 0],
                                           pred_traj[j - 1, 1])
                    ns = G.sample_next_state(s, a)
                    nr, nc = G.get_coords(ns)
                    pred_traj[j, 0] = nr
                    pred_traj[j, 1] = nc
                    if nr == goal[0] and nc == goal[1]:
                        # We hit goal so fill remaining steps
                        pred_traj[j + 1:, 0] = nr
                        pred_traj[j + 1:, 1] = nc
                        break
                # Plot optimal and predicted path (also start, end)
                if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
                    print('success!')
                    # print('pred_traj', pred_traj)
                return pred_traj
    return pred_traj
Ejemplo n.º 2
0
def main(config,
         n_domains=100,
         max_obs=30,
         max_obs_size=None,
         n_traj=1,
         n_actions=8):
    # Correct vs total:
    correct, total = 0.0, 0.0
    # Automatic swith of GPU mode if available
    use_GPU = torch.cuda.is_available()
    # Instantiate a VIN model
    vin = VIN(config)
    # Load model parameters
    vin.load_state_dict(torch.load(config.weights))
    # Use GPU if available
    if use_GPU:
        vin = vin.cuda()

    for dom in range(n_domains):
        # Randomly select goal position
        goal = [
            np.random.randint(config.imsize),
            np.random.randint(config.imsize)
        ]
        # Generate obstacle map
        obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
        # Add obstacles to map
        n_obs = obs.add_n_rand_obs(max_obs)
        # Add border to map
        border_res = obs.add_border()
        # Ensure we have valid map
        if n_obs == 0 or not border_res:
            continue
        # Get final map
        im = obs.get_final()

        # Generate gridworld from obstacle map
        G = gridworld(im, goal[0], goal[1])
        # Get value prior
        value_prior = G.get_reward_prior()
        # Sample random trajectories to our goal
        states_xy, states_one_hot = sample_trajectory(G, n_traj)

        for i in range(n_traj):
            if len(states_xy[i]) > 1:

                # Get number of steps to goal
                L = len(states_xy[i]) * 2
                # Allocate space for predicted steps
                pred_traj = np.zeros((L, 2))
                # Set starting position
                pred_traj[0, :] = states_xy[i][0, :]

                for j in range(1, L):
                    # Transform current state data
                    state_data = pred_traj[j - 1, :]
                    state_data = state_data.astype(np.int)
                    # Transform domain to Networks expected input shape
                    im_data = G.image.astype(np.int)
                    im_data = 1 - im_data
                    im_data = im_data.reshape(1, 1, config.imsize,
                                              config.imsize)
                    # Transfrom value prior to Networks expected input shape
                    value_data = value_prior.astype(np.int)
                    value_data = value_data.reshape(1, 1, config.imsize,
                                                    config.imsize)
                    # Get inputs as expected by network
                    X_in = torch.from_numpy(
                        np.append(im_data, value_data, axis=1)).float()
                    S1_in = torch.from_numpy(state_data[0].reshape(
                        [1, 1])).float()
                    S2_in = torch.from_numpy(state_data[1].reshape(
                        [1, 1])).float()
                    # Send Tensors to GPU if available
                    if use_GPU:
                        X_in = X_in.cuda()
                        S1_in = S1_in.cuda()
                        S2_in = S2_in.cuda()
                    # Wrap to autograd.Variable
                    X_in, S1_in, S2_in = Variable(X_in), Variable(
                        S1_in), Variable(S2_in)
                    # Forward pass in our neural net
                    _, predictions = vin(X_in, S1_in, S2_in, config)
                    _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
                    a = indices.data.numpy()[0][0]
                    # Transform prediction to indices
                    s = G.map_ind_to_state(pred_traj[j - 1, 0],
                                           pred_traj[j - 1, 1])
                    ns = G.sample_next_state(s, a)
                    nr, nc = G.get_coords(ns)
                    pred_traj[j, 0] = nr
                    pred_traj[j, 1] = nc
                    if nr == goal[0] and nc == goal[1]:
                        # We hit goal so fill remaining steps
                        pred_traj[j + 1:, 0] = nr
                        pred_traj[j + 1:, 1] = nc
                        break
                # Plot optimal and predicted path (also start, end)
                if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
                    correct += 1
                total += 1
                if config.plot == True:
                    visualize(G.image.T, states_xy[i], pred_traj)
        sys.stdout.write("\r" + str(int((float(dom) / n_domains) * 100.0)) +
                         "%")
        sys.stdout.flush()
    sys.stdout.write("\n")
    print('Rollout Accuracy: {:.2f}%'.format(100 * (correct / total)))
Ejemplo n.º 3
0
def make_data(dom_size, n_domains, max_obs, max_obs_size, n_traj,
              state_batch_size):

    X_l = []
    S1_l = []
    S2_l = []
    Labels_l = []

    dom = 0.0
    while dom <= n_domains:
        goal = [np.random.randint(dom_size[0]), np.random.randint(dom_size[1])]
        # Generate obstacle map
        obs = obstacles([dom_size[0], dom_size[1]], goal, max_obs_size)
        # Add obstacles to map
        n_obs = obs.add_n_rand_obs(max_obs)
        # Add border to map
        border_res = obs.add_border()
        # Ensure we have valid map
        if n_obs == 0 or not border_res:
            continue
        # Get final map
        im = obs.get_final()
        # Generate gridworld from obstacle map
        G = gridworld(im, goal[0], goal[1])
        # Get value prior
        value_prior = G.t_get_reward_prior()
        # Sample random trajectories to our goal
        states_xy, states_one_hot = sample_trajectory(G, n_traj)
        for i in range(n_traj):
            if len(states_xy[i]) > 1:
                # Get optimal actions for each state
                actions = extract_action(states_xy[i])
                ns = states_xy[i].shape[0] - 1
                # Invert domain image => 0 = free, 1 = obstacle
                image = 1 - im
                # Resize domain and goal images and concate
                image_data = np.resize(image, (1, 1, dom_size[0], dom_size[1]))
                value_data = np.resize(value_prior,
                                       (1, 1, dom_size[0], dom_size[1]))
                iv_mixed = np.concatenate((image_data, value_data), axis=1)
                X_current = np.tile(iv_mixed, (ns, 1, 1, 1))
                # Resize states
                S1_current = np.expand_dims(states_xy[i][0:ns, 0], axis=1)
                S2_current = np.expand_dims(states_xy[i][0:ns, 1], axis=1)
                # Resize labels
                Labels_current = np.expand_dims(actions, axis=1)
                # Append to output list
                X_l.append(X_current)
                S1_l.append(S1_current)
                S2_l.append(S2_current)
                Labels_l.append(Labels_current)
        dom += 1
        sys.stdout.write("\r" + str(int((dom / n_domains) * 100)) + "%")
        sys.stdout.flush()
    sys.stdout.write("\n")
    # Concat all outputs
    X_f = np.concatenate(X_l)
    S1_f = np.concatenate(S1_l)
    S2_f = np.concatenate(S2_l)
    Labels_f = np.concatenate(Labels_l)
    return X_f, S1_f, S2_f, Labels_f
Ejemplo n.º 4
0
 def X2R(self,X):
     goal = [np.argmax(X[1])//config.imsize,np.argmax(X[1])%config.imsize]
     G = gridworld(1-X[0], goal[0], goal[1])
     R = X[1] - (1-X[0])*0.02 - 2 * X[0]
     return R,goal
Ejemplo n.º 5
0
 def __init__(self,X):
     self.R ,self.goal = self.X2R(X)
     self.G = gridworld(1-X[0], self.goal[0], self.goal[1])
     self.actions = np.asarray([[-1, 0], [1, 0], [0, 1], [0, -1],
                           [-1, 1], [-1, -1], [1, 1], [1, -1]])
def make_data(dom_size, n_domains, max_obs, max_obs_size, n_traj,
              state_batch_size):

    X_l = []
    S1_l = []
    S2_l = []
    Labels_l = []

    dom = 0.0
    while dom <= n_domains:
        goal = [np.random.randint(dom_size[0]), np.random.randint(dom_size[1])]
        # Generate obstacle map
        obs = obstacles([dom_size[0], dom_size[1]], goal, max_obs_size)
        # Add obstacles to map
        n_obs = obs.add_n_rand_obs(max_obs)
        # Add border to map
        border_res = obs.add_border()
        # Ensure we have valid map
        if n_obs == 0 or not border_res:
            continue
        # Get final map
        im = obs.get_final()
        # Generate gridworld from obstacle map
        G = gridworld(im, goal[0], goal[1])
        # Get value prior
        value_prior = G.t_get_reward_prior()
        # Sample random trajectories to our goal
        states_xy, states_one_hot = sample_trajectory(G, n_traj)
        for i in range(n_traj):
            if len(states_xy[i]) > 1:
                # Get optimal actions for each state
                actions = extract_action(states_xy[i])
                ns = states_xy[i].shape[0] - 1
                # Invert domain image => 0 = free, 1 = obstacle
                image = 1 - im
                # Resize domain and goal images and concate
                image_data = np.resize(image, (1, 1, dom_size[0], dom_size[1]))
                value_data = np.resize(value_prior,
                                       (1, 1, dom_size[0], dom_size[1]))
                iv_mixed = np.concatenate((image_data, value_data), axis=1)
                X_current = np.tile(iv_mixed, (ns, 1, 1, 1))
                # Resize states
                S1_current = np.expand_dims(states_xy[i][0:ns, 0], axis=1)
                S2_current = np.expand_dims(states_xy[i][0:ns, 1], axis=1)
                # Resize labels
                Labels_current = np.expand_dims(actions, axis=1)
                # Append to output list
                X_l.append(X_current)
                S1_l.append(S1_current)
                S2_l.append(S2_current)
                Labels_l.append(Labels_current)
        dom += 1
        sys.stdout.write("\r" + str(int((dom / n_domains) * 100)) + "%")
        sys.stdout.flush()
    sys.stdout.write("\n")
    # Concat all outputs
    X_f = np.concatenate(X_l)
    S1_f = np.concatenate(S1_l)
    S2_f = np.concatenate(S2_l)
    Labels_f = np.concatenate(Labels_l)
    return X_f, S1_f, S2_f, Labels_f
def main(config,
         n_domains=100,
         max_obs=30,
         max_obs_size=None,
         n_traj=1,
         n_actions=8):
    # Correct vs total:
    correct, total = 0.0, 0.0
    # Automatic swith of GPU mode if available
    use_GPU = torch.cuda.is_available()
    # Instantiate a VIN model
    vin = VIN(config)
    # Load model parameters
    vin.load_state_dict(torch.load(config.weights))
    # Use GPU if available
    if use_GPU:
        vin = vin.cuda()

    for dom in range(n_domains):
        # Randomly select goal position
        goal = [
            np.random.randint(config.imsize),
            np.random.randint(config.imsize)
        ]
        # Generate obstacle map
        obs = obstacles([config.imsize, config.imsize], goal, max_obs_size)
        # Add obstacles to map
        n_obs = obs.add_n_rand_obs(max_obs)
        # Add border to map
        border_res = obs.add_border()
        # Ensure we have valid map
        if n_obs == 0 or not border_res:
            continue
        # Get final map
        im = obs.get_final()

        # Generate gridworld from obstacle map
        G = gridworld(im, goal[0], goal[1])
        # Get value prior
        value_prior = G.get_reward_prior()
        # Sample random trajectories to our goal
        states_xy, states_one_hot = sample_trajectory(G, n_traj)

        for i in range(n_traj):
            if len(states_xy[i]) > 1:

                # Get number of steps to goal
                L = len(states_xy[i]) * 2
                # Allocate space for predicted steps
                pred_traj = np.zeros((L, 2))
                # Set starting position
                pred_traj[0, :] = states_xy[i][0, :]

                for j in range(1, L):
                    # Transform current state data
                    state_data = pred_traj[j - 1, :]
                    state_data = state_data.astype(np.int)
                    # Transform domain to Networks expected input shape
                    im_data = G.image.astype(np.int)
                    im_data = 1 - im_data
                    im_data = im_data.reshape(1, 1, config.imsize,
                                              config.imsize)
                    # Transfrom value prior to Networks expected input shape
                    value_data = value_prior.astype(np.int)
                    value_data = value_data.reshape(1, 1, config.imsize,
                                                    config.imsize)
                    # Get inputs as expected by network
                    X_in = torch.from_numpy(
                        np.append(im_data, value_data, axis=1)).float()
                    S1_in = torch.from_numpy(state_data[0].reshape(
                        [1, 1])).float()
                    S2_in = torch.from_numpy(state_data[1].reshape(
                        [1, 1])).float()
                    # Send Tensors to GPU if available
                    if use_GPU:
                        X_in = X_in.cuda()
                        S1_in = S1_in.cuda()
                        S2_in = S2_in.cuda()
                    # Wrap to autograd.Variable
                    X_in, S1_in, S2_in = Variable(X_in), Variable(
                        S1_in), Variable(S2_in)
                    # Forward pass in our neural net
                    _, predictions = vin(X_in, S1_in, S2_in, config)
                    _, indices = torch.max(predictions.cpu(), 1, keepdim=True)
                    a = indices.data.numpy()[0][0]
                    # Transform prediction to indices
                    s = G.map_ind_to_state(pred_traj[j - 1, 0],
                                           pred_traj[j - 1, 1])
                    ns = G.sample_next_state(s, a)
                    nr, nc = G.get_coords(ns)
                    pred_traj[j, 0] = nr
                    pred_traj[j, 1] = nc
                    if nr == goal[0] and nc == goal[1]:
                        # We hit goal so fill remaining steps
                        pred_traj[j + 1:, 0] = nr
                        pred_traj[j + 1:, 1] = nc
                        break
                # Plot optimal and predicted path (also start, end)
                if pred_traj[-1, 0] == goal[0] and pred_traj[-1, 1] == goal[1]:
                    correct += 1
                total += 1
                if config.plot == True:
                    visualize(G.image.T, states_xy[i], pred_traj)
        sys.stdout.write("\r" + str(int(
            (float(dom) / n_domains) * 100.0)) + "%")
        sys.stdout.flush()
    sys.stdout.write("\n")
    print('Rollout Accuracy: {:.2f}%'.format(100 * (correct / total)))