Exemple #1
0
def test_flatten_space_boxes(space):
    flat_space = utils.flatten_space(space)
    assert isinstance(flat_space,
                      Box), f"Expected {type(flat_space)} to equal {Box}"
    flatdim = utils.flatdim(space)
    (single_dim, ) = flat_space.shape
    assert single_dim == flatdim, f"Expected {single_dim} to equal {flatdim}"
Exemple #2
0
def test_flat_space_contains_flat_points(space):
    some_samples = [space.sample() for _ in range(10)]
    flattened_samples = [utils.flatten(space, sample) for sample in some_samples]
    flat_space = utils.flatten_space(space)
    for i, flat_sample in enumerate(flattened_samples):
        assert (
            flat_sample in flat_space
        ), f"Expected sample #{i} {flat_sample} to be in {flat_space}"
Exemple #3
0
def test_flatten_space_boxes(space):
    flat_space = utils.flatten_space(space)
    assert isinstance(flat_space, Box), "Expected {} to equal {}".format(
        type(flat_space), Box)
    flatdim = utils.flatdim(space)
    (single_dim, ) = flat_space.shape
    assert single_dim == flatdim, "Expected {} to equal {}".format(
        single_dim, flatdim)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.obs_sizes = _get_size(obs_space.spaces[0])
        self.n_players = len(obs_space.spaces)
        self.n_actions = action_space.spaces[0].n

        os = []
        ms = []
        intermediate_space = Box(low=0, high=2, shape=(8, ), dtype=np.float32)
        for pl in range(self.n_players):
            os.append(flatten_space(obs_space.spaces[pl].spaces['obs']))
            mid_space = flatten_space(
                Dict({
                    'obs': intermediate_space,
                    'signal': obs_space.spaces[pl]['signal']
                }))
            ms.append(mid_space)

        assert self.n_players <= 2, "Not yet supported for more than 2 players"  # TODO: make it support n_players > 2
        embed_config = {
            "fcnet_hiddens": [128, 128],
            "fcnet_activation": 'relu',
            "max_seq_len": 20
        }
        self.embed_pl_models = {
            pl: FullyConnectedNetwork(os[pl], intermediate_space, 8,
                                      embed_config, "{}_embeding".format(pl))
            for pl in range(self.n_players)
        }

        self.pl_models = {
            pl: FullyConnectedNetwork(ms[pl], action_space.spaces[pl],
                                      action_space.spaces[pl].n, model_config,
                                      name)
            for pl in range(self.n_players)
        }

        # Set models as attributes to obtain parameters
        for pl in range(self.n_players):
            setattr(self, "embed_model_{}".format(pl),
                    self.embed_pl_models[pl])
            setattr(self, "model_{}".format(pl), self.pl_models[pl])
def test_flat_space_contains_flat_points(space):
    some_samples = [space.sample() for _ in range(10)]
    flattened_samples = [
        utils.flatten(space, sample) for sample in some_samples
    ]
    flat_space = utils.flatten_space(space)
    for i, flat_sample in enumerate(flattened_samples):
        assert flat_sample in flat_space,\
            'Expected sample #{} {} to be in {}'.format(i, flat_sample, flat_space)
Exemple #6
0
    def __init__(self, config, width, height, seeker, hiding, walls):
        self.default_cfg = config

        self.map_path = config['game']['map']
        self.fps = config['game']['fps']
        self.clock = pygame.time.Clock()
        self.screen = None

        self.dt = self.clock.tick_busy_loop(self.fps)
        self.cfg = config['game']
        self.duration = config['game']['duration']

        self.width = width
        self.height = height

        self.walls_group = pygame.sprite.Group()
        self.env_walls = walls
        self.walls_group.add(walls)

        self.player_seek = seeker
        self.player_hide = hiding
        self.players_group = pygame.sprite.Group()
        self.players_group.add(self.player_seek)
        self.players_group.add(self.player_hide)

        self.screen_lite = pygame.Surface((self.width, self.height))
        if self.walls_group:
            for wall in walls:
                wall_p = [(p.x, p.y) for p in wall.get_abs_vertices()]
                _ = [pygame.draw.polygon(
                    self.screen_lite, (255, 255, 255), wall_p)]

        self.p_hide_cfg = config['hiding']
        self.p_seek_cfg = config['seeker']
        self.agent_env = {}
        self.action_space = spaces.Discrete(6)  # for both agents
        '''
        0 - NOOP 
        1 - FORWARD MOVEMENT
        2 - BACKWARD MOVEMENT
        3 - ROTATE RIGHT (clockwise)
        4 - ROTATE LEFT (counter-clockwise)
        5 - SPECIAL (ADD/DELETE WALL)
        '''

        self.observation_space_n = [
            spaces.Box(low=0, high=1, shape=(self.width, self.height)),
            spaces.Box(low=0, high=1, shape=(self.width, self.height)),
        ]

        self.flatten_observation_space_n = [flatten_space(
            space) for space in self.observation_space_n]
Exemple #7
0
def test_dtypes(original_space, expected_flattened_dtype):
    flattened_space = utils.flatten_space(original_space)

    original_sample = original_space.sample()
    flattened_sample = utils.flatten(original_space, original_sample)
    unflattened_sample = utils.unflatten(original_space, flattened_sample)

    assert flattened_space.contains(
        flattened_sample
    ), "Expected flattened_space to contain flattened_sample"
    assert flattened_space.dtype == expected_flattened_dtype, "Expected flattened_space's dtype to equal " \
                                                              "{}".format(expected_flattened_dtype)

    assert flattened_sample.dtype == flattened_space.dtype, "Expected flattened_space's dtype to equal " \
                                                            "flattened_sample's dtype "

    compare_sample_types(original_space, original_sample, unflattened_sample)
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.obs_sizes = _get_size(obs_space.spaces[0])
        self.n_players = len(obs_space.spaces)
        self.n_actions = action_space.spaces[0].n

        os = []
        for pl in range(self.n_players):
            os.append(flatten_space(obs_space.spaces[pl]))

        self.pl_models = {
            pl: FullyConnectedNetwork(os[pl], action_space.spaces[pl],
                                      action_space.spaces[pl].n, model_config,
                                      name)
            for pl in range(self.n_players)
        }

        # Set models as attributes to obtain parameters
        for pl in range(self.n_players):
            setattr(self, "model_{}".format(pl), self.pl_models[pl])
Exemple #9
0
def test_flatten_space(space, expected_flattened_space):
    flattened_space = utils.flatten_space(space)
    assert flattened_space == expected_flattened_space
Exemple #10
0
    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)

        # The default value for the buffer size in the DQN model is WAY too
        # large, so we re-size it depending on the size of the observations.

        flattened_observation_space = flatten_space(setting.observation_space)
        observation_size_bytes = flattened_observation_space.sample().nbytes

        # IF there are more than a few dimensions per observation, then we
        # should probably reduce the size of the replay buffer according to
        # the size of the observations.
        max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024
        max_buffer_length = max_buffer_size_bytes // observation_size_bytes

        if max_buffer_length == 0:
            raise RuntimeError(
                f"Couldn't even fit a single observation in the buffer, "
                f"given the  specified max_buffer_size_megabytes "
                f"({self.max_buffer_size_megabytes}) and the size of a "
                f"single observation ({observation_size_bytes} bytes)!")

        if self.hparams.buffer_size > max_buffer_length:
            calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size
            calculated_size_gb = calculated_size_bytes / 1024**3
            warnings.warn(
                RuntimeWarning(
                    f"The selected buffer size ({self.hparams.buffer_size} is "
                    f"too large! (It would take roughly around "
                    f"{calculated_size_gb:.3f}Gb to hold  many observations alone! "
                    f"The buffer size will be capped at {max_buffer_length} "
                    f"entries."))

            self.hparams.buffer_size = int(max_buffer_length)

        # Don't use up too many of the observations from the task to fill up the buffer.
        # Truth is, we should probably get this to work first.

        # NOTE: Need to change some attributes depending on the maximal number of steps
        # in the environment allowed in the given Setting.
        if setting.max_steps:
            logger.info(
                f"Total training steps are limited to {setting.steps_per_task} steps "
                f"per task, {setting.max_steps} steps in total.")
            ten_percent_of_step_budget = setting.steps_per_task // 10

            if self.hparams.buffer_size > ten_percent_of_step_budget:
                warnings.warn(
                    RuntimeWarning(
                        "Reducing max buffer size to ten percent of the step budget."
                    ))
                self.hparams.buffer_size = ten_percent_of_step_budget

            if self.hparams.learning_starts > ten_percent_of_step_budget:
                logger.info(
                    f"The model was originally going to use the first "
                    f"{self.hparams.learning_starts} steps for pure random "
                    f"exploration, but the setting has a max number of steps set to "
                    f"{setting.max_steps}, therefore we will limit the number of "
                    f"exploration steps to 10% of that 'step budget' = "
                    f"{ten_percent_of_step_budget} steps.")
                self.hparams.learning_starts = ten_percent_of_step_budget

            if self.hparams.target_update_interval > ten_percent_of_step_budget:
                # Same for the 'update target network' interval.
                self.hparams.target_update_interval = ten_percent_of_step_budget // 2
                logger.info(
                    f"Reducing the target network update interval to "
                    f"{self.hparams.target_update_interval}, because of the limit on "
                    f"training steps imposed by the Setting.")

        logger.info(
            f"Will use a Replay buffer of size {self.hparams.buffer_size}.")
Exemple #11
0
    def configure(self, setting: ContinualRLSetting):
        super().configure(setting)
        # The default value for the buffer size in the DQN model is WAY too
        # large, so we re-size it depending on the size of the observations.
        # NOTE: (issue #156) Only consider the images, not the task labels for these
        # buffer size calculations (since the task labels might be None and have the
        # np.object dtype).
        x_space = setting.observation_space.x
        flattened_observation_space = flatten_space(x_space)
        observation_size_bytes = flattened_observation_space.sample().nbytes

        # IF there are more than a few dimensions per observation, then we
        # should probably reduce the size of the replay buffer according to
        # the size of the observations.
        max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024
        max_buffer_length = max_buffer_size_bytes // observation_size_bytes

        if max_buffer_length == 0:
            raise RuntimeError(
                f"Couldn't even fit a single observation in the buffer, "
                f"given the  specified max_buffer_size_megabytes "
                f"({self.max_buffer_size_megabytes}) and the size of a "
                f"single observation ({observation_size_bytes} bytes)!"
            )

        if self.hparams.buffer_size > max_buffer_length:
            calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size
            calculated_size_gb = calculated_size_bytes / 1024 ** 3
            warnings.warn(
                RuntimeWarning(
                    f"The selected buffer size ({self.hparams.buffer_size} is "
                    f"too large! (It would take roughly around "
                    f"{calculated_size_gb:.3f}Gb to hold  many observations alone! "
                    f"The buffer size will be capped at {max_buffer_length} "
                    f"entries."
                )
            )

            self.hparams.buffer_size = int(max_buffer_length)

        # NOTE: Need to change some attributes depending on the maximal number of steps
        # in the environment allowed in the given Setting.
        if setting.max_steps:
            logger.info(
                f"Total training steps are limited to {setting.steps_per_task} steps "
                f"per task, {setting.max_steps} steps in total."
            )
            ten_percent_of_step_budget = setting.steps_per_phase // 10

            if self.hparams.buffer_size > ten_percent_of_step_budget:
                warnings.warn(
                    RuntimeWarning(
                        "Reducing max buffer size to ten percent of the step budget."
                    )
                )
                self.hparams.buffer_size = ten_percent_of_step_budget

            if self.hparams.learning_starts > ten_percent_of_step_budget:
                logger.info(
                    f"The model was originally going to use the first "
                    f"{self.hparams.learning_starts} steps for pure random "
                    f"exploration, but the setting has a max number of steps set to "
                    f"{setting.max_steps}, therefore we will limit the number of "
                    f"exploration steps to 10% of that 'step budget' = "
                    f"{ten_percent_of_step_budget} steps."
                )
                self.hparams.learning_starts = ten_percent_of_step_budget
                if self.hparams.train_freq != -1:
                    # Update the model at least 2 times during each task, and at most
                    # once per step.
                    self.hparams.train_freq = min(
                        self.hparams.train_freq, int(0.5 * ten_percent_of_step_budget),
                    )
                    self.hparams.train_freq = max(self.hparams.train_freq, 1)

                logger.info(f"Training frequency: {self.hparams.train_freq}")

        logger.info(f"Will use a Replay buffer of size {self.hparams.buffer_size}.")

        if setting.steps_per_phase:
            if not isinstance(self.hparams.train_freq, int):
                if self.hparams.train_freq[1] == "step":
                    self.hparams.train_freq = self.hparams.train_freq[0]
                else:
                    assert self.hparams.train_freq[1] == "episode"

                    # Use some value based of the maximum episode length if available,
                    # else use a "reasonable" default value.
                    # TODO: Double-check that this makes sense.
                    if setting.max_episode_steps:
                        self.hparams.train_freq = setting.max_episode_steps
                    else:
                        self.hparams.train_freq = 10

                    warnings.warn(
                        RuntimeWarning(
                            f"Need the training frequency units to be steps for now! "
                            f"(Train freq has been changed to every "
                            f"{self.hparams.train_freq} steps)."
                        )
                    )

            # NOTE: We limit the number of training steps per task, such that we never
            # attempt to fill the buffer using more samples than the environment allows.
            if self.hparams.train_freq > setting.steps_per_phase:
                self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase)
                logger.info(
                    f"Capping the n_steps to 10% of step budget length: "
                    f"{self.hparams.n_steps}"
                )

            self.train_steps_per_task = min(
                self.train_steps_per_task,
                setting.steps_per_phase - self.hparams.train_freq - 1,
            )
            logger.info(
                f"Limitting training steps per task to {self.train_steps_per_task}"
            )