Exemple #1
0
    def reset(self,
              env: IEnvironment[State, Action, Reward],
              root_option: Node[Option[OptionData]],
              random_seed: Union[int, RandomState] = None) -> None:
        """
            Parameters
            ----------
            env: IEnvironment
            root_option: Node[Option]
            random_seed: Union[int, RandomState] = None
        """
        if random_seed is not None:
            self.random = optional_random(random_seed)
        assert root_option not in self._num_options
        # check that we haven't seen this before
        if len(self.list_roots) >= self.max_length:
            to_remove: Node[Option[OptionData]] = self.list_roots.pop()
            del self._num_options[to_remove]
            del self._num_transitions[to_remove]
        self.list_roots.append(root_option)
        self._num_options[root_option] = 0
        self._num_transitions[root_option] = 0
        self._trajectory_for[root_option] = []

        self.current_option = root_option
Exemple #2
0
 def _random_tile_of_type(
         self,
         required_type: str = None,
         rand_seed: Union[int, RandomState] = None) -> Point:
     random: np.random = optional_random(rand_seed)
     possibilities: List[Point] = self._all_tiles_of_type(required_type)
     return array_random_choice(possibilities, random=random)
Exemple #3
0
    def reset(self, 
            env: IEnvironment[State, Action, Reward],
            root_option: Option[OptionData], 
            random_seed: Union[int, RandomState] = None) -> None:
        """
            Reset the agent to function in a new environment/episode.
            Parameters
            ----------
            env: IEnvironment[State, Action, Reward]
                the environment the agent is about to act in
            root_option: Option
                the base option that the agent begins executing
        """
        self.root_option_node = Node(root_option)
        self.current_option_node = self.root_option_node
        self.prev_option_node = None
        if random_seed is not None:
            self.random = optional_random(random_seed)

        self.evaluator.reset(env, random_seed)
        self.generator.reset(env, random_seed)
        self.planning_terminator.reset(env, random_seed)
        self.policy_terminator.reset(env, random_seed)
        self.low_level.reset(env, root_option, random_seed)
        self.memory.reset(env, self.root_option_node, random_seed)
Exemple #4
0
 def __init__(self):
     self.target_path: Optional[List[DirectedPoint]] = None
     self.waypoints: PriorityQueue[List[DirectedPoint]] = PriorityQueue()
     self.history: List[DirectedPoint] = []
     self.visited: NumPyDict[Point, bool] = NumPyDict(dtype=np.int8)
     self.current_goal: Optional[Point] = None
     self.random: RandomState = optional_random()
     self.backstep: int = 0  # used if we are in the process of backing up
Exemple #5
0
 def reset(self, env: MazeWorld=None, random_seed: Union[int, RandomState] = None):
     if self.env is None:
         assert env is not None 
     self.env = env
     self.eps = 0.03
     self.eps_max = 0.7
     self.history = []
     self.n_repeats = 0  
     self.random = optional_random(random_seed)
Exemple #6
0
 def _create_random_problem(
         self,
         rand_seed: Union[int, RandomState] = None) -> Tuple[Point, Point]:
     random = optional_random(rand_seed)
     start = self._random_tile_of_type("Empty", random)
     end = self._random_tile_of_type("Empty", random)
     while np.array_equal(start, end):
         end = self._random_tile_of_type("Empty", random)
     return start, end
Exemple #7
0
    def __init__(self, v_model: IVModel[State, Reward, OptionData],
                 q_model: IQModel[State, Reward,
                                  OptionData], settings: Dict[str, Any],
                 get_beta: Callable[[int], float], gamma: float):

        self.v_model: IVModel[State, Reward, OptionData] = v_model
        self.q_model: IQModel[State, Reward, OptionData] = q_model
        self.get_beta: Callable[[int], float] = get_beta
        self.step: int = 0
        self.gamma: float = gamma
        self.random: RandomState = optional_random(settings['random'])
Exemple #8
0
 def reset(self,
           env: Optional[IEnvironment[OneHotImg, Action, Reward]],
           root_option: Option[Point],
           random_seed: Union[int, RandomState] = None) -> None:
     self.target_path = None
     self.waypoints = PriorityQueue()
     self.history = []
     self.visited = NumPyDict(dtype=np.int8)
     self.current_goal = root_option.value
     self.backstep = 0
     if random_seed is not None:
         self.random = optional_random(random_seed)
Exemple #9
0
    def __init__(self,
                 source_network: keras.Model,
                 target_network: keras.Model,
                 action_space: gym.spaces.Discrete,
                 batch_size: int,
                 replay_size: int,
                 device: tf.device = None,
                 gamma: float = 0.99,
                 learning_rate: float = 3e-4,
                 random: Union[int, RandomState] = None,
                 target_update_freq: int = 1000,
                 **kwargs):
        """
        :param source_network: nn.Module[f32, dev] : State* => [batch, n_stations]
            Network used for scoring
            State should be expected state  format with the first dimension
            reserved for batches
        :param target_network: Should basically be a copy of source_network
        :param action_space: gym.spaces.Discrete
        :param batch_size: int
            batch_size for training
        :param replay_size: int
            number of instances to save in memory
        :param (optional) device: tf.device
            defaults to first gpu if available
        :param (optional) gamma: float in (0, 1)
            defaults to 0.99
        :param (optional) learning_rate: float
            defaults to 3e-4
        :param (optional) random: Union[int, RandomState] initial seed
            defaults to using global random
        :param kwargs: used for ReplayBuffer
        """
        kwargs = kwargify(locals())
        self.device: tf.device = optional_device(device)
        self.n_actions: int = action_space.n
        self.random: RandomState = optional_random(random)
        self.gamma = gamma
        learning_rate: float = learning_rate
        self.batch_size: int = batch_size
        self.target_update_freq: int = target_update_freq
        assert self.batch_size > 0

        self.q_network: keras.Model = source_network
        self.target_q_network: keras.Model = target_network

        self.replay_buffer = ReplayBuffer(capacity=replay_size, **kwargs)
        self.optimizer = optimizers.Adam(learning_rate=learning_rate)
Exemple #10
0
    def __init__(self,
                 capacity: int,
                 random: Union[int, RandomState] = None,
                 **kwargs):
        """
        :param capacity: int
            number of replay transitions to hold
        :param (optional) random: Union[int, RandomState]
            random seed to use
            defaults to global numpy random
        :param kwargs: for compatibility
        """
        self.capacity: int = capacity
        self.buffer: deque = deque(maxlen=self.capacity)

        random: Union[int, RandomState, None] = random
        self.random: RandomState = optional_random(random)
Exemple #11
0
 def reset(self,
           env: IEnvironment[State, Action, Reward],
           random_seed: Union[int, RandomState] = None) -> None:
     """
         Prepares the agent to begin functioning in the new environment.
         Should be called each time there is a new episode or a new env.
         random_seed may be passed in as well to set the random seed
         Parameters
         ----------
         env: Environment
             the new environment (may be the same as the old)
         random_seed: Option[Union[int, RandomState]] = None
             the new random seed to be used. If None, no changes
             are made
     """
     if random_seed is not None:
         self.random = optional_random(random_seed)
     self.q_model.reset(env, random_seed)
     self.v_model.reset(env, random_seed)
Exemple #12
0
    def __init__(self,
                 max_length: int = None,
                 random_seed: Union[int, RandomState] = None):
        """
            Parameters
            ----------
            max_length: int>0 = None
            random_seed: Union[int, RandomState] = None
        """
        assert max_length is None or max_length > 0
        self.max_length = max_length
        self.random_seed: RandomState = optional_random(random_seed)

        self._num_options: Dict[Node[Option[OptionData]], int] = {}
        self._num_transitions: Dict[Node[Option[OptionData]], int] = {}
        self._trajectory_for: Dict[Node[Option[OptionData]],
                                   Trajectory[State, Action, Reward]] = {}
        self.list_roots: List[Node[Option]] = []

        self.current_option: Node[Option] = None
    def __init__(self, arrivals: Arrivals, departures: List[List[int]],
                 initial_station_state: np.ndarray, station_info: np.ndarray,
                 sample_distance: float, sample_amount: float, max_cars: int,
                 car_speed: float, **kwargs):
        """

        :param arrivals: List[List[ArrivalEvents]]: [max_t, ]
            list of arrivals at each timestep. Note: some of these will
            becomes queries
        :param departures: List[List[int]]
            list of indices of each station with departing cars at each timestep
            Note: multiple cars may depart from the same station at the same tstep
            Note: this only lists departures that are not associated with an arrival
        :param initial_station_state:  np.ndarray[int8] : [n_stations, 1]
            initial number of occupied slots at stations
        :param station_info: np.ndarray[float] : [n_stations, 3]
            idx => (x, y, max_occupancy)
            information about each station, indexed by rows
        :param sample_distance: float
            samples are generated around arrival event
                with range Normal(0, sample_distance)
        :param sample_amount: Union[float, int]
                int => generates this many queries
                float => turns this percentage of arrivals in to queries
        :param max_cars: int
                the number of slots to allocate for holding car data
        :param car_speed: float
            how far each car moves towards destination at each timestep
        :param kwargs: for compatibility
        """
        kwargs = kwargify(locals())
        self.arrivals: Arrivals = arrivals
        self.departures: List[List[int]] = departures
        self.initial_station_state: np.ndarray = initial_station_state
        self.station_info: np.ndarray = station_info
        self.sample_distance: int = sample_distance
        self.sample_amount: Union[int, float] = sample_amount
        self.car_speed: float = car_speed
        self.max_cars: int = max_cars
        self.random: RandomState = optional_random()
        self.kwargs = kwargs
Exemple #14
0
    def __init__(self,
            evaluator: IEvaluator[State, Action, Reward, OptionData],
            generator: IGenerator[State, Action, Reward, OptionData],
            planning_terminator: IPlanningTerminator[State, Action, Reward, OptionData],
            policy_terminator: IPolicyTerminator[State, Action, Reward, OptionData],
            low_level: IOptionBasedAgent[State, Action, Reward, OptionData],
            memory: IMemory[State, Action, Reward, Option],
            settings: Dict[str, Any]):
        self.evaluator: IEvaluator[State, Action, Reward, OptionData] = evaluator
        self.generator: IGenerator[State, Action, Reward, OptionData] = generator
        self.planning_terminator: IPlanningTerminator[State, Action, Reward, OptionData] = \
            planning_terminator
        self.policy_terminator: IPolicyTerminator[State, Action, Reward, OptionData] = \
            policy_terminator
        self.low_level: IOptionBasedAgent[State, Action, Reward, OptionData] = low_level
        self.memory: IMemory[State, Action, Reward, OptionData] = memory 

        self.random: RandomState = optional_random(settings['random'])

        self.current_option_node: Node[Option[OptionData]] = None
        self.prev_option_node: Optional[Node[Option[OptionData]]] = None
        self.actionable_option_node: Node[Option[OptionData]] = None
        self.root_option_node: Node[Option[OptionData]] = None
 def seed(self, random: Union[int, RandomState] = None) -> None:
     self.random = optional_random(random)
Exemple #16
0
 def reset(self,
           rand_seed: Union[int, RandomState] = None) -> Tuple[State, Goal]:
     random = optional_random(rand_seed)
     self._location, self._goal = self._create_random_problem(random)
     return self.state(), self._goal