def get_next_states(self, state, action): """Returns the next state given a state and an action.""" action = int(action) orientation, x, y = state.agent_pos new_orientation, new_x, new_y = state.agent_pos new_tree_states = deepcopy(state.tree_states) new_bucket_states = deepcopy(state.bucket_states) new_carrying_apple = state.carrying_apple if action == Direction.get_number_from_direction(Direction.STAY): pass elif action < len(Direction.ALL_DIRECTIONS): new_orientation = action move_x, move_y = Direction.move_in_direction_number((x, y), action) # New position is legal if (0 <= move_x < self.width and 0 <= move_y < self.height and (move_x, move_y) in self.possible_agent_locations): new_x, new_y = move_x, move_y else: # Move only changes orientation, which we already handled pass elif action == 5: obj_pos = Direction.move_in_direction_number((x, y), orientation) if state.carrying_apple: # We always drop the apple new_carrying_apple = False # If we're facing a bucket, it goes there if obj_pos in new_bucket_states: prev_apples = new_bucket_states[obj_pos] new_bucket_states[obj_pos] = min(prev_apples + 1, self.bucket_capacity) elif obj_pos in new_tree_states and new_tree_states[obj_pos]: new_carrying_apple = True new_tree_states[obj_pos] = False else: # Interact while holding nothing and not facing a tree. pass else: raise ValueError("Invalid action {}".format(action)) new_pos = new_orientation, new_x, new_y def make_state(prob_apples_tuple): prob, tree_apples = prob_apples_tuple trees = dict(zip(self.tree_locations, tree_apples)) s = ApplesState(new_pos, trees, new_bucket_states, new_carrying_apple) return (prob, s, 0) # For apple regeneration, don't regenerate apples that were just picked, # so use the apple booleans from the original state old_tree_apples = [ state.tree_states[loc] for loc in self.tree_locations ] new_tree_apples = [new_tree_states[loc] for loc in self.tree_locations] return list( map(make_state, self.regen_apples(old_tree_apples, new_tree_apples)))
def test_direction_number_conversion(self): all_directions = Direction.ALL_DIRECTIONS all_numbers = [] for direction in Direction.ALL_DIRECTIONS: number = Direction.get_number_from_direction(direction) direction_again = Direction.get_direction_from_number(number) self.assertEqual(direction, direction_again) all_numbers.append(number) # Check that all directions are distinct num_directions = len(all_directions) self.assertEqual(len(set(all_directions)), num_directions) # Check that the numbers are 0, 1, ... num_directions - 1 self.assertEqual(set(all_numbers), set(range(num_directions)))
def __init__(self, spec): self.init_state = deepcopy(spec.init_state) self.height = spec.height self.width = spec.width self.battery_locations = sorted( list(self.init_state.battery_present.keys())) self.num_batteries = len(self.battery_locations) self.feature_locations = list(spec.feature_locations) self.train_transition = spec.train_transition self.train_locations = list(self.train_transition.keys()) assert set(self.train_locations) == set(self.train_transition.values()) self.nA = 5 super().__init__(10) self.default_action = Direction.get_number_from_direction( Direction.STAY) self.num_features = len(self.s_to_f(self.init_state)) self.reset() states = self.enumerate_states() self.make_transition_matrices(states, range(self.nA), self.nS, self.nA) self.make_f_matrix(self.nS, self.num_features)
def get_next_state(self, state, action): """Returns the next state given a state and an action.""" action = int(action) new_x, new_y = Direction.move_in_direction_number( state.agent_pos, action) # New position is still in bounds: if not (0 <= new_x < self.width and 0 <= new_y < self.height): new_x, new_y = state.agent_pos new_agent_pos = new_x, new_y new_train_pos, new_train_life = state.train_pos, state.train_life new_battery_present = deepcopy(state.battery_present) new_carrying_battery = state.carrying_battery if new_agent_pos == state.train_pos and state.carrying_battery: new_train_life = 10 new_carrying_battery = False if new_train_life > 0: new_train_pos = self.train_transition[state.train_pos] new_train_life -= 1 if (new_agent_pos in state.battery_present and state.battery_present[new_agent_pos] and not state.carrying_battery): new_carrying_battery = True new_battery_present[new_agent_pos] = False result = BatteriesState( new_agent_pos, new_train_pos, new_train_life, new_battery_present, new_carrying_battery, ) return result
def __init__(self, spec): self.height = spec.height self.width = spec.width self.init_state = deepcopy(spec.init_state) self.apple_regen_probability = spec.apple_regen_probability self.bucket_capacity = spec.bucket_capacity self.include_location_features = spec.include_location_features self.tree_locations = list(self.init_state.tree_states.keys()) self.num_trees = len(self.tree_locations) self.bucket_locations = list(self.init_state.bucket_states.keys()) self.num_buckets = len(self.bucket_locations) used_locations = set(self.tree_locations + self.bucket_locations) self.possible_agent_locations = list( filter( lambda pos: pos not in used_locations, product(range(self.width), range(self.height)), )) self.nA = 6 super().__init__(max(5, self.bucket_capacity)) self.default_action = Direction.get_number_from_direction( Direction.STAY) self.num_features = len(self.s_to_f(self.init_state)) self.reset() states = self.enumerate_states() self.make_transition_matrices(states, range(self.nA), self.nS, self.nA) self.make_f_matrix(self.nS, self.num_features)
def get_next_state(self, state, action): """Returns the next state given a state and an action.""" action = int(action) if action == Direction.get_number_from_direction(Direction.STAY): pass elif action < len(Direction.ALL_DIRECTIONS): move_x, move_y = Direction.move_in_direction_number(state, action) # New position is legal if 0 <= move_x < self.width and 0 <= move_y < self.height: state = move_x, move_y else: # Move only changes orientation, which we already handled pass else: raise ValueError("Invalid action {}".format(action)) return state
def get_orientation_char(orientation): direction_to_char = { Direction.NORTH: "↑", Direction.SOUTH: "↓", Direction.WEST: "←", Direction.EAST: "→", Direction.STAY: "*", } direction = Direction.get_direction_from_number(orientation) return direction_to_char[direction]
def get_next_state(self, state, action): """Returns the next state given a state and an action.""" action = int(action) new_x, new_y = Direction.move_in_direction_number( state.agent_pos, action) # New position is still in bounds: if not (0 <= new_x < self.width and 0 <= new_y < self.height): new_x, new_y = state.agent_pos new_agent_pos = new_x, new_y new_vase_states = deepcopy(state.vase_states) if new_agent_pos in new_vase_states: new_vase_states[new_agent_pos] = False # Break the vase return RoomState(new_agent_pos, new_vase_states)
def get_next_state(self, state, action): """Return the next state given a state and an action.""" action = int(action) new_x, new_y = Direction.move_in_direction_number( state.agent_pos, action) # New position is still in bounds: if not (0 <= new_x < self.width and 0 <= new_y < self.height): new_x, new_y = state.agent_pos new_agent_pos = new_x, new_y new_vase_states = deepcopy(state.vase_states) new_train_pos, new_train_intact = state.train_pos, state.train_intact if state.train_intact: new_train_pos = self.train_transition[state.train_pos] # Break the vase and train if appropriate if new_agent_pos in new_vase_states: new_vase_states[new_agent_pos] = False if new_agent_pos == new_train_pos: new_train_intact = False return TrainState(new_agent_pos, new_vase_states, new_train_pos, new_train_intact)
def __init__(self, prob, use_pixels_as_observations=True): self.height = 3 self.width = 3 self.init_state = (1, 1) self.prob = prob self.nS = self.height * self.width self.nA = 5 super().__init__(1, use_pixels_as_observations=use_pixels_as_observations) self.num_features = 2 self.default_action = Direction.get_number_from_direction( Direction.STAY) self.num_features = len(self.s_to_f(self.init_state)) self.reset() states = self.enumerate_states() self.make_transition_matrices(states, range(self.nA), self.nS, self.nA) self.make_f_matrix(self.nS, self.num_features)
def __init__(self, spec): self.height = spec.height self.width = spec.width self.init_state = deepcopy(spec.init_state) self.vase_locations = list(self.init_state.vase_states.keys()) self.num_vases = len(self.vase_locations) self.carpet_locations = set(spec.carpet_locations) self.feature_locations = list(spec.feature_locations) self.nA = 5 super().__init__( 1, use_pixels_as_observations=spec.use_pixels_as_observations) self.default_action = Direction.get_number_from_direction( Direction.STAY) self.num_features = len(self.s_to_f(self.init_state)) states = self.enumerate_states() self.reset() self.make_transition_matrices(states, range(self.nA), self.nS, self.nA) self.make_f_matrix(self.nS, self.num_features)
def _collect_data(self, n_rollouts, debug_only_stay=False): observations, actions = [], [] for _ in range(n_rollouts): traj_len = 0 # ensure trajectories are longer than self.timesteps while traj_len < self.timesteps: obs = self.env.reset() traj_act = [] traj_obs = [obs] done = False while not done: if debug_only_stay: action = Direction.get_number_from_direction( Direction.STAY) else: action = self.env.action_space.sample() obs, _, done, _ = self.env.step(action) traj_obs.append(obs) traj_act.append(action) traj_len = len(traj_obs) traj_act.append(np.zeros(self.action_space_shape)) observations.append(traj_obs) actions.append(traj_act) return observations, actions
def _get_eval_trajectory(self): eval_actions = [ Direction.get_onehot_from_direction(Direction.EAST), Direction.get_onehot_from_direction(Direction.EAST), Direction.get_onehot_from_direction(Direction.EAST), Direction.get_onehot_from_direction(Direction.NORTH), Direction.get_onehot_from_direction(Direction.NORTH), Direction.get_onehot_from_direction(Direction.NORTH), Direction.get_onehot_from_direction(Direction.WEST), Direction.get_onehot_from_direction(Direction.SOUTH), Direction.get_onehot_from_direction(Direction.WEST), Direction.get_onehot_from_direction(Direction.WEST), Direction.get_onehot_from_direction(Direction.WEST), Direction.get_onehot_from_direction(Direction.WEST), Direction.get_onehot_from_direction(Direction.SOUTH), Direction.get_onehot_from_direction(Direction.SOUTH), ] assert len(eval_actions) < self.env.time_horizon trajectory = [] obs1 = self.env.reset() done = False for action in eval_actions: obs2, _, done, _ = self.env.step(action) state1 = self.latent_space.encoder(np.expand_dims(obs1, 0)) state2 = self.latent_space.encoder(np.expand_dims(obs2, 0)) trajectory.append((state1, action, state2)) obs1 = obs2 return trajectory
3, ApplesState( agent_pos=(0, 0, 2), tree_states={ (0, 0): True, (2, 0): True, (2, 4): True }, bucket_states={(1, 2): 0}, carrying_apple=False, ), apple_regen_probability=0.1, bucket_capacity=10, include_location_features=True, ), ApplesState( agent_pos=(Direction.get_number_from_direction(Direction.SOUTH), 1, 1), tree_states={ (0, 0): True, (2, 0): False, (2, 4): True }, bucket_states={(1, 2): 2}, carrying_apple=False, ), np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), ) }