예제 #1
0
def gen_L(grid_width, grid_height, path='L_expert_trajectories'):
    ''' Generates trajectories of shape L, with right turn '''
    t = 3
    n = 2
    num_traj = 50

    obstacles = create_obstacles(grid_width, grid_height)
    set_diff = list(set(product(tuple(range(3, grid_width-3)),
                                tuple(range(3, grid_height-3)))) \
                                        - set(obstacles))

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)
    expert_data_dict = {}
    # Number of goals is the same as number of actions
    num_actions, num_goals = 4, 4
    env_data_dict = {'num_actions': num_actions, 'num_goals': num_goals}

    for i in range(num_traj):
        start_state = State(sample_start(set_diff), obstacles)
        for action_idx in range(num_actions):

            path_key = str(i) + '_' + str(action_idx)
            expert_data_dict[path_key] = {
                'state': [],
                'action': [],
                'goal': []
            }

            state = start_state

            for j in range(n):
                # Set initial direction
                if j == 0:
                    action = Action(action_idx)
                else:
                    if action.delta == 0:
                        action = Action(3)
                    elif action.delta == 1:
                        action = Action(2)
                    elif action.delta == 2:
                        action = Action(0)
                    elif action.delta == 3:
                        action = Action(1)
                    else:
                        raise ValueError("Invalid action delta {}".format(
                            action.delta))

                for k in range(t):
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(action.delta)
                    state = T(state, action, j)
        # print(expert_data_dict[path_key]['goal'])

    return env_data_dict, expert_data_dict, obstacles, set_diff
예제 #2
0
def gen_L(grid_width, grid_height, path='L_expert_trajectories'):
    ''' Generates trajectories of shape L, with right turn '''
    t = 3
    n = 2
    N = 200

    obstacles = create_obstacles(grid_width, grid_height)
    set_diff = list(
        set(
            product(tuple(range(3, grid_width -
                                3)), tuple(range(3, grid_height - 3)))) -
        set(obstacles))

    if not os.path.exists(path):
        os.makedirs(path)

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)

    for i in range(N):
        filename = os.path.join(path, str(i) + '.txt')
        f = open(filename, 'w')
        for j in range(n):
            if j == 0:
                action = Action(random.choice(range(0, 4)))
                state = State(sample_start(set_diff), obstacles)
            else:  # take right turn
                if action.delta == 0:
                    action = Action(3)
                elif action.delta == 1:
                    action = Action(2)
                elif action.delta == 2:
                    action = Action(0)
                elif action.delta == 3:
                    action = Action(1)
            for k in range(t):
                f.write(' '.join([str(e)
                                  for e in state.state]) + '\n')  # write state
                f.write(
                    ' '.join([str(e)
                              for e in oned_to_onehot(action.delta, 4)]) +
                    '\n')  # write action
                f.write(
                    ' '.join([str(e)
                              for e in oned_to_onehot(action.delta, 4)]) +
                    '\n')  # write c[t]s
                state = T(state, action, j)

        f.close()
예제 #3
0
def gen_sq_rec(grid_width, grid_height, path='SR_expert_trajectories'):
    ''' Generates squares if starting in quadrants 1 and 4, and rectangles if starting in quadransts 2 and 3 '''
    N = 200

    obstacles = create_obstacles(grid_width, grid_height)

    if not os.path.exists(path):
        os.makedirs(path)

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)

    for i in range(N):
        filename = os.path.join(path, str(i) + '.txt')
        f = open(filename, 'w')
        half = random.choice(range(0, 2))
        if half == 0:  # left half
            set_diff = list(
                set(
                    product(tuple(range(0, (grid_width / 2) -
                                        3)), tuple(range(1, grid_height)))) -
                set(obstacles))
            start_loc = sample_start(set_diff)
        elif half == 1:  # right half
            set_diff = list(
                set(
                    product(tuple(range(grid_width / 2, grid_width -
                                        2)), tuple(range(2, grid_height)))) -
                set(obstacles))
            start_loc = sample_start(set_diff)

        state = State(start_loc, obstacles)

        if start_loc[0] >= grid_width / 2:  # quadrants 1 and 4
            # generate 2x2 square clockwise
            t = 2
            n = 4
            delta = 3

            for j in range(n):
                for k in range(t):
                    action = Action(delta)
                    f.write(' '.join([str(e) for e in state.state]) +
                            '\n')  # write state
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write action
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write c[t]s
                    state = T(state, action, j * 2 + k)

                if delta == 3:
                    delta = 1
                elif delta == 1:
                    delta = 2
                elif delta == 2:
                    delta = 0

        else:  # quadrants 2 and 3
            # generate 3x1 rectangle anti-clockwise
            t = [1, 3, 1, 3]
            delta = 1

            for j in range(len(t)):
                for k in range(t[j]):
                    action = Action(delta)
                    f.write(' '.join([str(e) for e in state.state]) +
                            '\n')  # write state
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write action
                    f.write(' '.join(
                        [str(e) for e in oned_to_onehot(action.delta, 4)]) +
                            '\n')  # write c[t]s
                    state = T(state, action, sum(t[0:j]) + k)

                if delta == 1:
                    delta = 3
                elif delta == 3:
                    delta = 0
                elif delta == 0:
                    delta = 2
예제 #4
0
def gen_room_trajs(grid_width, grid_height, room_size):

    N = 300
    T = 50

    num_goals = 4

    expert_data_dict = {}
    env_data_dict = {
        'num_actions': num_goals,
        'num_goals': 4,
    }

    obstacles, rooms, room_centres = create_obstacles(grid_width,
                                                      grid_height,
                                                      env_name='room',
                                                      room_size=room_size)
    #T = TransitionFunction(grid_width, grid_height, obstacle_movement)
    set_diff = list(set(product(tuple(range(0, grid_width)),tuple(range(0, grid_height)))) \
                    - set(obstacles))

    room_set = set(rooms)
    room_centre_set = set(room_centres)

    graph = Graph()
    deltas = {(0, 1): 0, (0, -1): 1, (-1, 0): 2, (1, 0): 3}

    for node in set_diff:
        for a in deltas:
            neigh = (node[0] + a[0], node[1] + a[1])
            if neigh[0] >= 0 and neigh[0] < grid_width and \
                    neigh[1] >= 0 and neigh[1] < grid_height:
                if neigh not in obstacles:
                    graph.add_edge(node, neigh, 1)
                    graph.add_edge(neigh, node, 1)

    for n in range(N):
        states, actions, goals = [], [], []

        rem_len, path_key = T, str(n)
        expert_data_dict[path_key] = {'state': [], 'action': [], 'goal': []}

        #start_state = State(sample_start(set_diff), obstacles)

        # initial start state will never be at centre of any room
        start_state = State(
            sample_start(list(set(set_diff) - room_centre_set)), obstacles)
        while rem_len > 0:

            #apple_state = State(sample_start(
            #    list(room_set-set(start_state.coordinates))), obstacles)

            # randomly select one room (goal) and place apple at its centre
            goal = random.choice(range(len(room_centres)))
            while room_centres[goal] == start_state.coordinates:
                goal = random.choice(range(len(room_centres)))
            apple_state = State(room_centres[goal], obstacles)
            # randomly spawn agent in a room, but not at same location as apple
            #start_state = State(sample_start(list(set(set_diff) - set(room_centres[goal]))), obstacles)

            source = start_state.coordinates
            destination = apple_state.coordinates
            p = graph.Dijkstra(source)
            node = destination

            path = []
            while node != source:
                path.append(node)
                node = p[node]

            path.append(source)
            path.reverse()

            path_len = min(len(path) - 1, rem_len)

            for i in range(path_len):
                s = path[i]
                next_s = path[i + 1]

                #state = np.array(s + destination)
                state = np.array(s)
                action = (next_s[0] - s[0], next_s[1] - s[1])
                action_delta = deltas[action]

                states.append(state)
                actions.append(action_delta)
                #goals.append(destination)
                goal_onehot = np.zeros((num_goals, ))
                goal_onehot[goal] = 1.0
                goals.append(goal_onehot)

            rem_len = rem_len - path_len
            start_state.coordinates = destination

        expert_data_dict[path_key]['state'] = states
        expert_data_dict[path_key]['action'] = actions
        expert_data_dict[path_key]['goal'] = goals

    return env_data_dict, expert_data_dict, obstacles, set_diff
예제 #5
0
def gen_diverse_trajs(grid_width, grid_height):
    '''Generate diverse trajectories in a 21x21 grid with 4 goals.

    Return: Dictionary with keys as text filenames and values as dictionary.
        Each value dictionary contains two keys, 'states' with a list of states
        as value, and 'actions' with list of actions as value.
    '''

    assert grid_width == 21 and grid_height == 21, "Incorrect grid width height"
    N = 20
    goals = [(0, 0), (20, 20), (20, 0), (0, 20)]
    n_goals = len(goals)

    obstacles = create_obstacles(21, 21, 'diverse')

    T = TransitionFunction(grid_width, grid_height, obstacle_movement)

    set_diff = list(set(product(tuple(range(7,13)),tuple(range(7,13)))) \
            - set(obstacles))
    expert_data_dict = {}
    env_data_dict = {
        'num_actions': 8,
        'num_goals': n_goals,
        'goals': np.array(goals),
    }

    for n in range(N):

        start_state = State(sample_start(set_diff), obstacles)

        for g in range(n_goals):  # loop over goals
            # path 1 - go up/down till boundary and then move right/left

            if g == 0 or g == 2:  # do path 1 only for goal 0 and goal 2

                state = start_state
                path_key = str(n) + '_' + str(g) + '_' + str(1) + '.txt'
                expert_data_dict[path_key] = {
                    'state': [],
                    'action': [],
                    'goal': []
                }

                delta = 0 if g < 2 else 1
                action = Action(delta)

                while state.state[1] != grid_height - 1 and state.state[1] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                delta = 3 if g == 0 or g == 3 else 2
                action = Action(delta)

                while state.state[0] != grid_width - 1 and state.state[0] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                assert (state.coordinates in goals)

            # path 2 - go right/left till boundary and then move up/down

            if g == 1:  # do path 2 only for goal 1

                state = start_state
                path_key = str(n) + '_' + str(g) + '_' + str(2) + '.txt'
                expert_data_dict[path_key] = {
                    'state': [],
                    'action': [],
                    'goal': []
                }

                delta = 3 if g == 0 or g == 3 else 2
                action = Action(delta)

                while state.state[0] != grid_width - 1 and state.state[0] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                delta = 0 if g < 2 else 1
                action = Action(delta)

                while state.state[1] != grid_height - 1 and state.state[1] != 0:
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = T(state, action, 0)

                assert (state.coordinates in goals)

            # path 3 - go diagonally till obstacle and then
            #          move up/down if x > 10 or right/left if y > 10
            #          and then move right/left or up/down till goal

            if g == 3:  # do path 3 only for goal 3

                state = start_state
                path_key = str(n) + '_' + str(g) + '_' + str(3) + '.txt'
                expert_data_dict[path_key] = {
                    'state': [],
                    'action': [],
                    'goal': []
                }

                delta = g + 4
                action = Action(delta)

                while True:
                    new_state = T(state, action, 0)
                    if new_state.coordinates == state.coordinates:
                        break
                    expert_data_dict[path_key]['state'].append(state.state)
                    expert_data_dict[path_key]['action'].append(action.delta)
                    expert_data_dict[path_key]['goal'].append(g)
                    state = new_state

                if T(state, Action(2), 0).coordinates == state.coordinates \
                    or T(state, Action(3), 0).coordinates == state.coordinates:

                    delta = 0 if g < 2 else 1
                    action = Action(delta)

                    while state.state[1] != grid_height - 1 and state.state[
                            1] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                    delta = 3 if g == 0 or g == 3 else 2
                    action = Action(delta)

                    while state.state[0] != grid_width - 1 and state.state[
                            0] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                else:

                    delta = 3 if g == 0 or g == 3 else 2
                    action = Action(delta)

                    while state.state[0] != grid_width - 1 and state.state[
                            0] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                    delta = 0 if g < 2 else 1
                    action = Action(delta)

                    while state.state[1] != grid_height - 1 and state.state[
                            1] != 0:
                        expert_data_dict[path_key]['state'].append(state.state)
                        expert_data_dict[path_key]['action'].append(
                            action.delta)
                        expert_data_dict[path_key]['goal'].append(g)
                        state = T(state, action, 0)

                assert (state.coordinates in goals)

    return env_data_dict, expert_data_dict, obstacles, set_diff
예제 #6
0
                    help='interval between training status logs (default: 10)')
parser.add_argument('--save-interval', type=int, default=100, metavar='N',
                    help='interval between saving policy weights (default: 100)')
parser.add_argument('--entropy-coeff', type=float, default=0.0, metavar='N',
                    help='coefficient for entropy cost')
parser.add_argument('--clip-epsilon', type=float, default=0.2, metavar='N',
                    help='Clipping for PPO grad')
parser.add_argument('--checkpoint', type=str, required=True,
                    help='path to checkpoint')

args = parser.parse_args()


#-----Environment-----#
width = height = 12
obstacles = create_obstacles(width, height)
set_diff = list(set(product(tuple(range(3, width-3)), repeat=2)) - set(obstacles))
start_loc = sample_start(set_diff)

s = State(start_loc, obstacles)
T = TransitionFunction(width, height, obstacle_movement)

if args.expert_path == 'SR2_expert_trajectories/':
    R = RewardFunction_SR2(-1.0,1.0,width)
else:
    R = RewardFunction(-1.0,1.0)

num_inputs = s.state.shape[0]
num_actions = 4
if args.expert_path == 'SR2_expert_trajectories/':
    num_c = 2