def gen_L(grid_width, grid_height, path='L_expert_trajectories'): ''' Generates trajectories of shape L, with right turn ''' t = 3 n = 2 num_traj = 50 obstacles = create_obstacles(grid_width, grid_height) set_diff = list(set(product(tuple(range(3, grid_width-3)), tuple(range(3, grid_height-3)))) \ - set(obstacles)) T = TransitionFunction(grid_width, grid_height, obstacle_movement) expert_data_dict = {} # Number of goals is the same as number of actions num_actions, num_goals = 4, 4 env_data_dict = {'num_actions': num_actions, 'num_goals': num_goals} for i in range(num_traj): start_state = State(sample_start(set_diff), obstacles) for action_idx in range(num_actions): path_key = str(i) + '_' + str(action_idx) expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } state = start_state for j in range(n): # Set initial direction if j == 0: action = Action(action_idx) else: if action.delta == 0: action = Action(3) elif action.delta == 1: action = Action(2) elif action.delta == 2: action = Action(0) elif action.delta == 3: action = Action(1) else: raise ValueError("Invalid action delta {}".format( action.delta)) for k in range(t): expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(action.delta) state = T(state, action, j) # print(expert_data_dict[path_key]['goal']) return env_data_dict, expert_data_dict, obstacles, set_diff
def gen_L(grid_width, grid_height, path='L_expert_trajectories'): ''' Generates trajectories of shape L, with right turn ''' t = 3 n = 2 N = 200 obstacles = create_obstacles(grid_width, grid_height) set_diff = list( set( product(tuple(range(3, grid_width - 3)), tuple(range(3, grid_height - 3)))) - set(obstacles)) if not os.path.exists(path): os.makedirs(path) T = TransitionFunction(grid_width, grid_height, obstacle_movement) for i in range(N): filename = os.path.join(path, str(i) + '.txt') f = open(filename, 'w') for j in range(n): if j == 0: action = Action(random.choice(range(0, 4))) state = State(sample_start(set_diff), obstacles) else: # take right turn if action.delta == 0: action = Action(3) elif action.delta == 1: action = Action(2) elif action.delta == 2: action = Action(0) elif action.delta == 3: action = Action(1) for k in range(t): f.write(' '.join([str(e) for e in state.state]) + '\n') # write state f.write( ' '.join([str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write action f.write( ' '.join([str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write c[t]s state = T(state, action, j) f.close()
def gen_sq_rec(grid_width, grid_height, path='SR_expert_trajectories'): ''' Generates squares if starting in quadrants 1 and 4, and rectangles if starting in quadransts 2 and 3 ''' N = 200 obstacles = create_obstacles(grid_width, grid_height) if not os.path.exists(path): os.makedirs(path) T = TransitionFunction(grid_width, grid_height, obstacle_movement) for i in range(N): filename = os.path.join(path, str(i) + '.txt') f = open(filename, 'w') half = random.choice(range(0, 2)) if half == 0: # left half set_diff = list( set( product(tuple(range(0, (grid_width / 2) - 3)), tuple(range(1, grid_height)))) - set(obstacles)) start_loc = sample_start(set_diff) elif half == 1: # right half set_diff = list( set( product(tuple(range(grid_width / 2, grid_width - 2)), tuple(range(2, grid_height)))) - set(obstacles)) start_loc = sample_start(set_diff) state = State(start_loc, obstacles) if start_loc[0] >= grid_width / 2: # quadrants 1 and 4 # generate 2x2 square clockwise t = 2 n = 4 delta = 3 for j in range(n): for k in range(t): action = Action(delta) f.write(' '.join([str(e) for e in state.state]) + '\n') # write state f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write action f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write c[t]s state = T(state, action, j * 2 + k) if delta == 3: delta = 1 elif delta == 1: delta = 2 elif delta == 2: delta = 0 else: # quadrants 2 and 3 # generate 3x1 rectangle anti-clockwise t = [1, 3, 1, 3] delta = 1 for j in range(len(t)): for k in range(t[j]): action = Action(delta) f.write(' '.join([str(e) for e in state.state]) + '\n') # write state f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write action f.write(' '.join( [str(e) for e in oned_to_onehot(action.delta, 4)]) + '\n') # write c[t]s state = T(state, action, sum(t[0:j]) + k) if delta == 1: delta = 3 elif delta == 3: delta = 0 elif delta == 0: delta = 2
def gen_room_trajs(grid_width, grid_height, room_size): N = 300 T = 50 num_goals = 4 expert_data_dict = {} env_data_dict = { 'num_actions': num_goals, 'num_goals': 4, } obstacles, rooms, room_centres = create_obstacles(grid_width, grid_height, env_name='room', room_size=room_size) #T = TransitionFunction(grid_width, grid_height, obstacle_movement) set_diff = list(set(product(tuple(range(0, grid_width)),tuple(range(0, grid_height)))) \ - set(obstacles)) room_set = set(rooms) room_centre_set = set(room_centres) graph = Graph() deltas = {(0, 1): 0, (0, -1): 1, (-1, 0): 2, (1, 0): 3} for node in set_diff: for a in deltas: neigh = (node[0] + a[0], node[1] + a[1]) if neigh[0] >= 0 and neigh[0] < grid_width and \ neigh[1] >= 0 and neigh[1] < grid_height: if neigh not in obstacles: graph.add_edge(node, neigh, 1) graph.add_edge(neigh, node, 1) for n in range(N): states, actions, goals = [], [], [] rem_len, path_key = T, str(n) expert_data_dict[path_key] = {'state': [], 'action': [], 'goal': []} #start_state = State(sample_start(set_diff), obstacles) # initial start state will never be at centre of any room start_state = State( sample_start(list(set(set_diff) - room_centre_set)), obstacles) while rem_len > 0: #apple_state = State(sample_start( # list(room_set-set(start_state.coordinates))), obstacles) # randomly select one room (goal) and place apple at its centre goal = random.choice(range(len(room_centres))) while room_centres[goal] == start_state.coordinates: goal = random.choice(range(len(room_centres))) apple_state = State(room_centres[goal], obstacles) # randomly spawn agent in a room, but not at same location as apple #start_state = State(sample_start(list(set(set_diff) - set(room_centres[goal]))), obstacles) source = start_state.coordinates destination = apple_state.coordinates p = graph.Dijkstra(source) node = destination path = [] while node != source: path.append(node) node = p[node] path.append(source) path.reverse() path_len = min(len(path) - 1, rem_len) for i in range(path_len): s = path[i] next_s = path[i + 1] #state = np.array(s + destination) state = np.array(s) action = (next_s[0] - s[0], next_s[1] - s[1]) action_delta = deltas[action] states.append(state) actions.append(action_delta) #goals.append(destination) goal_onehot = np.zeros((num_goals, )) goal_onehot[goal] = 1.0 goals.append(goal_onehot) rem_len = rem_len - path_len start_state.coordinates = destination expert_data_dict[path_key]['state'] = states expert_data_dict[path_key]['action'] = actions expert_data_dict[path_key]['goal'] = goals return env_data_dict, expert_data_dict, obstacles, set_diff
def gen_diverse_trajs(grid_width, grid_height): '''Generate diverse trajectories in a 21x21 grid with 4 goals. Return: Dictionary with keys as text filenames and values as dictionary. Each value dictionary contains two keys, 'states' with a list of states as value, and 'actions' with list of actions as value. ''' assert grid_width == 21 and grid_height == 21, "Incorrect grid width height" N = 20 goals = [(0, 0), (20, 20), (20, 0), (0, 20)] n_goals = len(goals) obstacles = create_obstacles(21, 21, 'diverse') T = TransitionFunction(grid_width, grid_height, obstacle_movement) set_diff = list(set(product(tuple(range(7,13)),tuple(range(7,13)))) \ - set(obstacles)) expert_data_dict = {} env_data_dict = { 'num_actions': 8, 'num_goals': n_goals, 'goals': np.array(goals), } for n in range(N): start_state = State(sample_start(set_diff), obstacles) for g in range(n_goals): # loop over goals # path 1 - go up/down till boundary and then move right/left if g == 0 or g == 2: # do path 1 only for goal 0 and goal 2 state = start_state path_key = str(n) + '_' + str(g) + '_' + str(1) + '.txt' expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) assert (state.coordinates in goals) # path 2 - go right/left till boundary and then move up/down if g == 1: # do path 2 only for goal 1 state = start_state path_key = str(n) + '_' + str(g) + '_' + str(2) + '.txt' expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) assert (state.coordinates in goals) # path 3 - go diagonally till obstacle and then # move up/down if x > 10 or right/left if y > 10 # and then move right/left or up/down till goal if g == 3: # do path 3 only for goal 3 state = start_state path_key = str(n) + '_' + str(g) + '_' + str(3) + '.txt' expert_data_dict[path_key] = { 'state': [], 'action': [], 'goal': [] } delta = g + 4 action = Action(delta) while True: new_state = T(state, action, 0) if new_state.coordinates == state.coordinates: break expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append(action.delta) expert_data_dict[path_key]['goal'].append(g) state = new_state if T(state, Action(2), 0).coordinates == state.coordinates \ or T(state, Action(3), 0).coordinates == state.coordinates: delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[ 1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[ 0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) else: delta = 3 if g == 0 or g == 3 else 2 action = Action(delta) while state.state[0] != grid_width - 1 and state.state[ 0] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) delta = 0 if g < 2 else 1 action = Action(delta) while state.state[1] != grid_height - 1 and state.state[ 1] != 0: expert_data_dict[path_key]['state'].append(state.state) expert_data_dict[path_key]['action'].append( action.delta) expert_data_dict[path_key]['goal'].append(g) state = T(state, action, 0) assert (state.coordinates in goals) return env_data_dict, expert_data_dict, obstacles, set_diff
help='interval between saving policy weights (default: 100)') parser.add_argument('--entropy-coeff', type=float, default=0.0, metavar='N', help='coefficient for entropy cost') parser.add_argument('--clip-epsilon', type=float, default=0.2, metavar='N', help='Clipping for PPO grad') parser.add_argument('--checkpoint', type=str, required=True, help='path to checkpoint') args = parser.parse_args() #-----Environment-----# width = height = 12 obstacles = create_obstacles(width, height) set_diff = list(set(product(tuple(range(3, width-3)), repeat=2)) - set(obstacles)) start_loc = sample_start(set_diff) s = State(start_loc, obstacles) T = TransitionFunction(width, height, obstacle_movement) if args.expert_path == 'SR2_expert_trajectories/': R = RewardFunction_SR2(-1.0,1.0,width) else: R = RewardFunction(-1.0,1.0) num_inputs = s.state.shape[0] num_actions = 4 if args.expert_path == 'SR2_expert_trajectories/': num_c = 2 else: num_c = 4
def sample_start_state(self): '''Randomly sample start state.''' start_loc = sample_start(self.set_diff) return State(start_loc, self.obstacles)
def test(Transition): model.eval() #test_loss = 0 for _ in range(20): c = expert.sample_c() N = c.shape[0] c = np.argmax(c[0, :]) if args.expert_path == 'SR_expert_trajectories/': if c == 1: half = 0 elif c == 3: half = 1 elif args.expert_path == 'SR2_expert_trajectories/': half = c if args.expert_path == 'SR_expert_trajectories/' or args.expert_path == 'SR2_expert_trajectories/': if half == 0: # left half set_diff = list( set( product(tuple(range(0, (width / 2) - 3)), tuple(range(1, height)))) - set(obstacles)) elif half == 1: # right half set_diff = list( set( product(tuple(range(width / 2, width - 2)), tuple(range(2, height)))) - set(obstacles)) else: set_diff = list( set(product(tuple(range(3, width - 3)), repeat=2)) - set(obstacles)) start_loc = sample_start(set_diff) s = State(start_loc, obstacles) R.reset() c = torch.from_numpy(np.array([-1.0, c])).unsqueeze(0).float() print 'c is ', c[0, 1] c = Variable(c) x = -1 * torch.ones(1, 4, 2) if args.cuda: x = x.cuda() c = c.cuda() for t in range(N): x[:, :3, :] = x[:, 1:, :] curr_x = torch.from_numpy(s.state).unsqueeze(0) if args.cuda: curr_x = curr_x.cuda() x[:, 3:, :] = curr_x x_t0 = Variable(x[:, 0, :]) x_t1 = Variable(x[:, 1, :]) x_t2 = Variable(x[:, 2, :]) x_t3 = Variable(x[:, 3, :]) mu, logvar = model.encode(torch.cat((x_t0, x_t1, x_t2, x_t3), 1), c) c[:, 0] = model.reparameterize(mu, logvar) pred_a = model.decode(torch.cat((x_t0, x_t1, x_t2, x_t3), 1), c).data.cpu().numpy() pred_a = np.argmax(pred_a) print pred_a next_s = Transition(s, Action(pred_a), R.t) s = next_s