Esempio n. 1
0
    def init_env_and_agent(env_config: DictConfig,
                           wrappers_config: CollectionOfConfigType,
                           max_episode_steps: int, agent_config: DictConfig,
                           input_dir: str, env_instance_seed: int,
                           agent_instance_seed: int) -> (BaseEnv, Policy):
        """Build the environment (including wrappers) and agent according to given configuration.

        :param env_config: Environment config.
        :param wrappers_config: Wrapper config.
        :param max_episode_steps: Max number of steps per episode to limit the env for.
        :param agent_config: Policies config.
        :param input_dir: Directory to load the model from.
        :param env_instance_seed: The seed for this particular env.
        :param agent_instance_seed: The seed for this particular agent.

        :return: Tuple of (instantiated environment, instantiated agent).
        """

        with SwitchWorkingDirectoryToInput(input_dir):
            env = EnvFactory(env_config, wrappers_config)()
            if not isinstance(env, TimeLimitWrapper):
                env = TimeLimitWrapper.wrap(env)
            env.set_max_episode_steps(max_episode_steps)
            env.seed(env_instance_seed)

            agent = Factory(base_type=Policy).instantiate(agent_config)
            agent.seed(agent_instance_seed)

        return env, agent
Esempio n. 2
0
    def _generate_runners(self, run_mode: RunMode) -> List[TrainingRunner]:
        """
        Generates training or rollout runner(s).
        :param run_mode: Run mode. See See :py:class:`~maze.maze.api.RunMode`.
        :return: Instantiated Runner instance.
        """

        cl = ConfigurationLoader(_run_mode=run_mode,
                                 _kwargs=self._auditors[run_mode].kwargs,
                                 _overrides=self._auditors[run_mode].overrides,
                                 _ephemeral_init_kwargs=self.
                                 _auditors[run_mode].ephemeral_init_kwargs)
        cl.load()

        self._workdirs = cl.workdirs
        self._configs[run_mode] = cl.configs
        runners: List[TrainingRunner] = []

        # Change to correct working directory (necessary due to being outside of Hydra scope).
        for workdir, config in zip(self._workdirs, self._configs[run_mode]):
            with working_directory(workdir):
                # Allow non-primitives in Hydra config.
                with omegaconf.flag_override(config, "allow_objects",
                                             True) as cfg:
                    # Set up and return runner.
                    runner = Factory(
                        base_type=TrainingRunner if run_mode ==
                        RunMode.TRAINING else RolloutRunner).instantiate(
                            cfg.runner)
                    runner.setup(cfg)
                    runners.append(runner)

        return runners
Esempio n. 3
0
    def __init__(
        self, algorithm_config: ESAlgorithmConfig, torch_policy: TorchPolicy,
        shared_noise: SharedNoiseTable,
        normalization_stats: Optional[Dict[str, Tuple[np.ndarray, np.ndarray]]]
    ) -> None:
        super().__init__(algorithm_config)

        # --- training setup ---
        self.model_selection: Optional[ModelSelectionBase] = None
        self.policy: Union[Policy, TorchModel] = torch_policy

        self.shared_noise = shared_noise
        self.normalization_stats = normalization_stats

        # setup the optimizer, now that the policy is available
        self.optimizer = Factory(Optimizer).instantiate(
            algorithm_config.optimizer)
        self.optimizer.setup(self.policy)

        # prepare statistics collection
        self.eval_stats = LogStatsAggregator(LogStatsLevel.EPOCH,
                                             get_stats_logger("eval"))
        self.train_stats = LogStatsAggregator(LogStatsLevel.EPOCH,
                                              get_stats_logger("train"))
        # injection of ES-specific events
        self.es_events = self.train_stats.create_event_topic(ESEvents)
Esempio n. 4
0
    def __init__(self, theta_threshold_radians: float, x_threshold: float,
                 reward_aggregator: RewardAggregatorInterface):
        super().__init__()

        self.theta_threshold_radians = theta_threshold_radians
        self.x_threshold = x_threshold

        # init pubsub for event to reward routing
        self.pubsub = Pubsub(self.context.event_service)

        # KPIs calculation
        self.kpi_calculator = CartPoleKpiCalculator()

        # init reward and register it with pubsub
        self.reward_aggregator = Factory(RewardAggregatorInterface).instantiate(reward_aggregator)
        self.pubsub.register_subscriber(self.reward_aggregator)

        # setup environment
        self.cart_position = None
        self.cart_velocity = None
        self.pole_angle = None
        self.pole_velocity = None

        self.env_rng: Optional[np.random.RandomState] = None
        self.seed(None)
        self._setup_env()

        # initialize rendering
        self.renderer = CartPoleRenderer(pole_length=self.length, x_threshold=self.x_threshold)
    def __init__(self, observation_spaces_dict: Dict[Union[str, int],
                                                     spaces.Dict],
                 action_spaces_dict: Dict[Union[str, int], spaces.Dict],
                 networks: CollectionOfConfigType):
        super().__init__(observation_spaces_dict, action_spaces_dict)
        assert len(networks) == 1
        network = networks[0]

        flat_action_space = flat_structured_space(self._action_spaces_dict)
        obs_shapes_flat = flat_structured_shapes(self._obs_shapes)

        # Infer the critic out shapes. When all action heads in a given state are discrete the discrete version of the
        #   state-action critic is used that outputs a value for each possible action (for each action). Otherwise
        #   the more general version is used which returns one value for a given state and action.
        critic_output_shapes = dict()
        if all(self._only_discrete_spaces.values()):
            for act_key, act_space in flat_action_space.spaces.items():
                critic_output_shapes[act_key + '_q_values'] = (act_space.n, )
        else:
            for act_key, act_space in flat_action_space.spaces.items():
                if isinstance(act_space, spaces.Discrete):
                    obs_shapes_flat[act_key] = (act_space.n, )
                else:
                    obs_shapes_flat[act_key] = act_space.sample().shape
            critic_output_shapes['q_value'] = (1, )

        # initialize critic
        model_registry = Factory(base_type=nn.Module)
        self._critics = {
            0:
            model_registry.instantiate(network,
                                       obs_shapes=obs_shapes_flat,
                                       output_shapes=critic_output_shapes)
        }
Esempio n. 6
0
def test_init_cartpole_rllib_model():
    """test the init methods"""
    hydra_overrides = {'rllib/runner': 'dev', 'model': 'rllib'}

    cfg = load_hydra_config('maze.conf', 'conf_rllib', hydra_overrides)

    runner = Factory(base_type=MazeRLlibRunner).instantiate(cfg.runner)
    runner.setup(cfg)
    ray_config, rllib_config, tune_config = runner.ray_config, runner.rllib_config, runner.tune_config

    assert isinstance(runner.env_factory(), CartPoleEnv)

    assert isinstance(ray_config, dict)
    assert isinstance(rllib_config, dict)
    assert isinstance(tune_config, dict)

    assert rllib_config['env'] == 'maze_env'
    assert rllib_config['framework'] == 'torch'
    assert rllib_config['num_workers'] == 1
    for k, v in rllib_config['model'].items():
        if v == "DEPRECATED_VALUE":
            v = DEPRECATED_VALUE
        assert k in MODEL_DEFAULTS, f'Maze RLlib model parameter \'{k}\' not in RLlib MODEL_DEFAULTS (rllib version: ' \
                                    f'{ray.__version__})'
        assert MODEL_DEFAULTS[k] == v, f'Rllib key:\'{k}\',value:\'{MODEL_DEFAULTS[k]}\' does not match with the ' \
                                       f'maze defined config \'{v}\' with rllib version: {ray.__version__}'

    if 'ObservationNormalizationWrapper' in cfg.wrappers:
        assert os.path.exists(
            cfg.wrappers.ObservationNormalizationWrapper.statistics_dump)
        os.remove(cfg.wrappers.ObservationNormalizationWrapper.statistics_dump)
    def _initialize_normalization_strategies(self) -> None:
        """Initialize normalization strategies for all sub steps and all dictionary observations.
        """
        # iterate sub steps
        for sub_step_key, sub_space in self._original_observation_spaces_dict.items(
        ):
            assert isinstance(
                sub_space, gym.spaces.Dict
            ), "Only gym.spaces.Dict are supported as of now!"

            # iterate keys of dict observation space
            for obs_key in sub_space.spaces.keys():

                if obs_key in self.exclude:
                    continue

                # start out with default values
                normalization_strategy = self.default_strategy
                strategy_config = copy.copy(self.default_strategy_config)
                statistics = self.default_statistics

                # check if statistics have been computed and dumped
                if obs_key in self.loaded_stats:
                    statistics = self.loaded_stats[obs_key]

                # check if a manual config is specified
                if self._has_manual_config(obs_key):
                    manual_obs_config = self.manual_config[obs_key]

                    normalization_strategy = manual_obs_config.get(
                        "strategy", normalization_strategy)
                    statistics = manual_obs_config.get("statistics",
                                                       statistics)
                    strategy_config.update(
                        manual_obs_config.get("strategy_config", dict()))

                # build normalization strategy
                strategy = Factory(
                    ObservationNormalizationStrategy).instantiate({
                        "_target_":
                        normalization_strategy,
                        "observation_space":
                        sub_space[obs_key],
                        **strategy_config
                    })

                # update the observation space accordingly
                if statistics is not None and obs_key not in self.exclude:
                    strategy.set_statistics(statistics)
                    self.observation_spaces_dict[sub_step_key].spaces[
                        obs_key] = strategy.normalized_space()

                self._normalization_strategies[obs_key] = strategy

        # make sure that everything has been applied properly
        if self.manual_config is not None:
            self._check_manual_config()
Esempio n. 8
0
 def __init__(self,
              core_env: Union[CoreEnv, dict],
              action_conversion: CollectionOfConfigType,
              observation_conversion: CollectionOfConfigType):
     super().__init__(
         core_env=Factory(CartPoleCoreEnvironment).instantiate(core_env),
         action_conversion_dict=Factory(ActionConversionInterface).instantiate_collection(action_conversion),
         observation_conversion_dict=Factory(ObservationConversionInterface).instantiate_collection(
             observation_conversion))
Esempio n. 9
0
class DefaultPolicy(Policy):
    """Encapsulates one or more policies identified by policy IDs.

    :param policies: Dict of policy IDs and corresponding policies.
    """
    def __init__(self, policies: CollectionOfConfigType):
        self.policies = Factory(FlatPolicy).instantiate_collection(policies)

    @override(Policy)
    def needs_state(self) -> bool:
        """This policy does not require the state() object to compute the action."""
        return False

    @override(Policy)
    def seed(self, seed: int) -> None:
        """Not applicable since Global seed should already be set before initializing the models"""
        pass

    @override(Policy)
    def compute_action(self,
                       observation: ObservationType,
                       maze_state: Optional[MazeStateType] = None,
                       env: Optional[BaseEnv] = None,
                       actor_id: Optional[ActorID] = None,
                       deterministic: bool = False) -> ActionType:
        """implementation of :class:`~maze.core.agent.policy.Policy` interface"""
        return self.policy_for(actor_id).compute_action(
            observation, deterministic=deterministic)

    def policy_for(self, actor_id: Optional[ActorID]) -> FlatPolicy:
        """Return policy corresponding to the given actor ID (or the single available policy if no actor ID is provided)

        :param actor_id: Actor ID to get policy for
        :return: Flat policy corresponding to the actor ID
        """
        if actor_id is None:
            assert len(
                self.policies.items()
            ) == 1, "no policy ID provided but multiple policies are available"
            return list(self.policies.values())[0]
        else:
            return self.policies[actor_id.step_key]

    @override(Policy)
    def compute_top_action_candidates(self, observation: ObservationType, num_candidates: Optional[int],
                                      maze_state: Optional[MazeStateType], env: Optional[BaseEnv],
                                      actor_id: Optional[ActorID] = None) \
            -> Tuple[Sequence[ActionType], Sequence[float]]:
        """implementation of :class:`~maze.core.agent.policy.Policy` interface"""
        raise NotImplementedError
Esempio n. 10
0
    def setup(self, cfg: DictConfig) -> None:
        """
        See :py:meth:`~maze.train.trainers.common.training_runner.TrainingRunner.setup`.
        """

        super().setup(cfg)

        # initialize distributed env
        envs = self.create_distributed_env(self.env_factory, self.concurrency, logging_prefix="train")
        train_env_instance_seeds = [self.maze_seeding.generate_env_instance_seed() for _ in range(self.concurrency)]
        envs.seed(train_env_instance_seeds)

        # initialize actor critic model
        model = TorchActorCritic(
            policy=self._model_composer.policy,
            critic=self._model_composer.critic,
            device=cfg.algorithm.device)

        # initialize best model selection
        self._model_selection = BestModelSelection(dump_file=self.state_dict_dump_file, model=model,
                                                   dump_interval=self.dump_interval)

        # initialize the env and enable statistics collection
        evaluator = None
        if cfg.algorithm.rollout_evaluator.n_episodes > 0:
            eval_env = self.create_distributed_env(self.env_factory, self.eval_concurrency, logging_prefix="eval")
            eval_env_instance_seeds = [self.maze_seeding.generate_env_instance_seed()
                                       for _ in range(self.eval_concurrency)]
            eval_env.seed(eval_env_instance_seeds)

            # initialize rollout evaluator
            evaluator = Factory(base_type=RolloutEvaluator).instantiate(cfg.algorithm.rollout_evaluator,
                                                                        eval_env=eval_env,
                                                                        model_selection=self._model_selection)

        # look up model class
        trainer_class = Factory(base_type=ActorCritic).type_from_name(self.trainer_class)

        # initialize trainer (from input directory)
        self._trainer = trainer_class(
            algorithm_config=cfg.algorithm,
            rollout_generator=RolloutGenerator(env=envs),
            evaluator=evaluator,
            model=model,
            model_selection=self._model_selection
        )

        self._init_trainer_from_input_dir(trainer=self._trainer, state_dict_dump_file=self.state_dict_dump_file,
                                          input_dir=cfg.input_dir)
Esempio n. 11
0
    def __init__(self, action_spaces_dict: Dict[StepKeyType, gym.spaces.Dict],
                 observation_spaces_dict: Dict[StepKeyType, gym.spaces.Dict],
                 agent_counts_dict: Dict[StepKeyType, int],
                 distribution_mapper_config: ConfigType, policy: ConfigType,
                 critic: ConfigType):
        super().__init__(action_spaces_dict, observation_spaces_dict,
                         agent_counts_dict, distribution_mapper_config)

        # init policy composer
        self._policy_composer = Factory(BasePolicyComposer).instantiate(
            policy,
            action_spaces_dict=self.action_spaces_dict,
            observation_spaces_dict=self.observation_spaces_dict,
            agent_counts_dict=self.agent_counts_dict,
            distribution_mapper=self._distribution_mapper)

        self.critic_input_spaces_dict = self._build_critic_input_space_dict()

        # init critic composer
        self._critics_composer = None
        if critic is not None:
            critic_type = Factory(CriticComposerInterface).type_from_name(
                critic['_target_']) if isinstance(critic,
                                                  Mapping) else type(critic)
            if issubclass(critic_type, SharedStateCriticComposer):
                assert self.critic_input_spaces_dict == self.observation_spaces_dict, \
                    f'Shared embedding is not yet supported for shared state critics'

            if issubclass(critic_type, BaseStateCriticComposer):
                self._critics_composer = Factory(
                    BaseStateCriticComposer).instantiate(
                        critic,
                        observation_spaces_dict=self.critic_input_spaces_dict,
                        agent_counts_dict=self.agent_counts_dict)
            elif issubclass(critic_type, BaseStateActionCriticComposer):
                assert self.critic_input_spaces_dict == self.observation_spaces_dict, \
                    f'Shared embedding is not yet supported for state-action critics'
                self._critics_composer = Factory(
                    BaseStateActionCriticComposer).instantiate(
                        critic,
                        observation_spaces_dict=self.critic_input_spaces_dict,
                        action_spaces_dict=self.action_spaces_dict)
            else:
                raise ValueError(
                    f"Critic of type {critic_type} not supported!")

        # save model graphs to pdf
        self.save_models()
Esempio n. 12
0
    def load_replay_buffer(self, replay_buffer: BaseReplayBuffer,
                           cfg: DictConfig) -> None:
        """Load the given trajectories as a dataset and fill the buffer with these trajectories.

        :param replay_buffer: The replay buffer to fill.
        :param cfg: The dict config of the experiment.
        """

        print(f'******* Starting to fill the replay buffer with trajectories from path: '
              f'{self.initial_demonstration_trajectories.input_data} *******')
        with SwitchWorkingDirectoryToInput(cfg.input_dir):
            dataset = Factory(base_type=Dataset).instantiate(self.initial_demonstration_trajectories,
                                                             conversion_env_factory=self.env_factory)
        assert isinstance(dataset, InMemoryDataset), 'Only in memory dataset supported at this point'

        if cfg.algorithm.split_rollouts_into_transitions:
            for step_record in dataset.step_records:
                assert step_record.next_observations is not None, "Next observations are required for sac"
                assert all(map(lambda x: x is not None, step_record.next_observations)), \
                    "Next observations are required for sac"
                replay_buffer.add_transition(step_record)
        else:
            for idx, trajectory_reference in enumerate(dataset.trajectory_references):
                traj = SpacesTrajectoryRecord(id=idx)
                traj.step_records = dataset.step_records[trajectory_reference]
                replay_buffer.add_transition(traj)
Esempio n. 13
0
def test_parallel_data_load_from_directory_clipped_from_hydra():
    """Test loading trajectories of multiple episodes in parallel into an in-memory dataset. (Each
    data-loader process reads the files assigned to it.)"""
    # Heuristics rollout
    rollout_config = {
        "configuration": "test",
        "env": "gym_env",
        "env.name": "CartPole-v0",
        "policy": "random_policy",
        "runner": "sequential",
        "runner.n_episodes": 2,
        "runner.max_episode_steps": 20,
        "seeding.env_base_seed": 12345,
        "seeding.agent_base_seed": 12345,
    }
    run_maze_job(rollout_config, config_module="maze.conf", config_name="conf_rollout")

    hydra_config = {
        '_target_': 'maze.core.trajectory_recording.datasets.in_memory_dataset.InMemoryDataset',
        'n_workers': 2,
        'conversion_env_factory': lambda: make_gym_maze_env("CartPole-v0"),
        'input_data': 'trajectory_data',
        'deserialize_in_main_thread': False,
        'trajectory_processor': {
            '_target_': 'maze.core.trajectory_recording.datasets.trajectory_processor.DeadEndClippingTrajectoryProcessor',
            'clip_k': 2
        }
    }

    dataset = Factory(InMemoryDataset).instantiate(hydra_config)

    assert len(dataset) == 11 + 17
Esempio n. 14
0
def check_env_and_model_instantiation(config_module: str, config: str,
                                      overrides: Dict[str, str]) -> None:
    """Check if env instantiation works."""
    with initialize_config_module(config_module):
        # config is relative to a module
        cfg = compose(
            config,
            overrides=[key + "=" + value for key, value in overrides.items()])

    env_factory = EnvFactory(cfg.env,
                             cfg.wrappers if "wrappers" in cfg else {})
    env = env_factory()
    assert env is not None
    assert isinstance(env, (StructuredEnv, StructuredEnvSpacesMixin))

    if 'model' in overrides and overrides['model'] == 'rllib':
        return

    if 'model' in cfg:
        model_composer = Factory(BaseModelComposer).instantiate(
            cfg.model,
            action_spaces_dict=env.action_spaces_dict,
            observation_spaces_dict=env.observation_spaces_dict,
            agent_counts_dict=env.agent_counts_dict)
        for pp in model_composer.policy.networks.values():
            assert isinstance(pp, nn.Module)

        if model_composer.critic:
            for cc in model_composer.critic.networks.values():
                assert isinstance(cc, nn.Module)
Esempio n. 15
0
    def __init__(self, in_keys: Union[str,
                                      List[str]], out_keys: Union[str,
                                                                  List[str]],
                 in_shapes: Union[Sequence[int], List[Sequence[int]]],
                 hidden_size: int, num_layers: int, bidirectional: bool,
                 non_lin: Union[str, type(nn.Module)]):
        super().__init__(in_keys=in_keys,
                         out_keys=out_keys,
                         in_shapes=in_shapes,
                         in_num_dims=3,
                         out_num_dims=3)
        self.input_units = self.in_shapes[0][-1]
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.non_lin = Factory(base_type=nn.Module).type_from_name(non_lin)
        self.output_units = 2 * self.hidden_size if self.bidirectional else self.hidden_size

        # compile network
        self.net = nn.LSTM(input_size=self.input_units,
                           hidden_size=self.hidden_size,
                           num_layers=self.num_layers,
                           bidirectional=self.bidirectional,
                           batch_first=True)
        self.final_dense = nn.Sequential(
            nn.Linear(in_features=self.output_units,
                      out_features=self.output_units), self.non_lin())
Esempio n. 16
0
    def __init__(self,
                 in_keys: Union[str, List[str]],
                 out_keys: Union[str, List[str]],
                 in_shapes: Union[Sequence[int], List[Sequence[int]]],
                 hidden_channels: List[int],
                 hidden_kernels: List[Union[int, Tuple[int, ...]]],
                 non_lin: Union[str, type(nn.Module)],
                 convolution_dimension: int,
                 hidden_strides: Optional[List[Union[int, Tuple[int, ...]]]],
                 hidden_dilations: Optional[List[Union[int, Tuple[int, ...]]]],
                 hidden_padding: Optional[List[Union[int, Tuple[int, ...]]]],
                 padding_mode: Optional[str]):

        assert convolution_dimension in [1, 2, 3]
        if convolution_dimension == 1:
            self.convolution_nn = nn.Conv1d
            in_out_num_dims = 3
            for hk in hidden_kernels:
                assert isinstance(hk, int)
        elif convolution_dimension == 2:
            self.convolution_nn = nn.Conv2d
            in_out_num_dims = 4
            for hk in hidden_kernels:
                assert isinstance(hk, int) or len(hk) == 2
        else:
            self.convolution_nn = nn.Conv3d
            in_out_num_dims = 5
            for hk in hidden_kernels:
                assert isinstance(hk, int) or len(hk) == 3

        super().__init__(in_keys=in_keys, out_keys=out_keys, in_shapes=in_shapes, in_num_dims=in_out_num_dims,
                         out_num_dims=in_out_num_dims)

        self.convolutional_dim = convolution_dimension
        self.input_channels = self.in_shapes[0][-(in_out_num_dims - 1)]

        self.hidden_channels = hidden_channels
        self.hidden_kernels = hidden_kernels
        self.non_lin = Factory(base_type=nn.Module).type_from_name(non_lin)
        self.output_channels = self.hidden_channels[-1]

        # Optional arguments
        num_layers = len(self.hidden_channels)
        self.hidden_strides = hidden_strides if hidden_strides is not None else [1 for _ in range(num_layers)]
        self.hidden_dilations = hidden_dilations if hidden_dilations is not None else [1 for _ in range(num_layers)]
        self.hidden_padding = hidden_padding if hidden_padding is not None else [0 for _ in range(num_layers)]
        self.padding_mode = padding_mode if padding_mode is not None else 'zeros'

        # checks
        assert self.padding_mode in ['zeros', 'reflect', 'replicate', 'circular']
        assert len(self.hidden_channels) == len(self.hidden_kernels)
        assert len(self.hidden_channels) == len(self.hidden_strides)
        assert len(self.hidden_channels) == len(self.hidden_dilations)

        # compile layer dictionary
        layer_dict = self.build_layer_dict()

        # compile network
        self.net = nn.Sequential(layer_dict)
Esempio n. 17
0
    def __call__(self, *args, **kwargs) -> MazeEnv:
        """environment factory
        :return: Newly created environment instance.
        """
        env = Factory(MazeEnv).instantiate(self.env)
        env = WrapperFactory.wrap_from_config(env, self.wrappers)

        return env
Esempio n. 18
0
def test_cartpole_model_composer():
    path_to_model_config = code_snippets.__path__._path[0] + '/custom_complex_net.yaml'

    model_composer = Factory(base_type=BaseModelComposer).instantiate(
        yaml.load(open(path_to_model_config, 'r')),
        action_spaces_dict=_mock_action_spaces_dict(),
        observation_spaces_dict=_mock_observation_spaces_dict(),
        agent_counts_dict=_mock_agent_counts_dict())
Esempio n. 19
0
def test_init_cartpole_maze_model():
    """test the init methods """
    hydra_overrides = {
        'rllib/runner': 'dev',
        'configuration': 'test',
        'env': 'gym_env',
        'model': 'vector_obs',
        'wrappers': 'vector_obs',
        'critic': 'template_state'
    }

    cfg = load_hydra_config('maze.conf', 'conf_rllib', hydra_overrides)

    runner = Factory(base_type=MazeRLlibRunner).instantiate(cfg.runner)
    runner.setup(cfg)
    ray_config, rllib_config, tune_config = runner.ray_config, runner.rllib_config, runner.tune_config

    assert isinstance(runner.env_factory(), CartPoleEnv)

    assert issubclass(_global_registry.get(RLLIB_ACTION_DIST, 'maze_dist'),
                      MazeRLlibActionDistribution)
    assert issubclass(_global_registry.get(RLLIB_MODEL, 'maze_model'),
                      MazeRLlibPolicyModel)

    assert isinstance(ray_config, dict)
    assert isinstance(rllib_config, dict)
    assert isinstance(tune_config, dict)

    assert rllib_config['env'] == 'maze_env'
    assert rllib_config['framework'] == 'torch'
    assert rllib_config['num_workers'] == 1
    model_config = rllib_config['model']

    assert model_config['custom_action_dist'] == 'maze_dist'
    assert model_config['custom_model'] == 'maze_model'
    assert model_config['vf_share_layers'] is False
    assert model_config['custom_model_config'][
        'maze_model_composer_config'] == cfg.model
    assert model_config['custom_model_config'][
        'spaces_config_dump_file'] == cfg.runner.spaces_config_dump_file

    if 'ObservationNormalizationWrapper' in cfg.wrappers:
        assert os.path.exists(
            cfg.wrappers.ObservationNormalizationWrapper.statistics_dump)
        os.remove(cfg.wrappers.ObservationNormalizationWrapper.statistics_dump)
Esempio n. 20
0
    def __init__(self, action_space: spaces.Dict,
                 distribution_mapper_config: CollectionOfConfigType):
        self.action_space = action_space

        # mapping of action heads to distributions and configs
        self._action_head_to_distribution: Dict[str, Tuple[
            type(TorchProbabilityDistribution), Dict[str, Any]]] = dict()

        # first: apply default config to action heads
        for action_head, sub_action_space in action_space.spaces.items():
            space_type = type(sub_action_space)
            dist_type: TorchProbabilityDistribution = self.default_mapping[
                space_type]
            self._action_head_to_distribution[action_head] = (dist_type, {})

        # second: parse custom mappings
        for entry_dict in distribution_mapper_config:
            assert "distribution" in entry_dict
            assert ("action_space" in entry_dict and "action_head" not in entry_dict) or \
                   ("action_space" not in entry_dict and "action_head" in entry_dict)

            # get the distribution type
            distribution_type = Factory(
                TorchProbabilityDistribution).type_from_name(
                    entry_dict["distribution"])

            # get additional distribution arguments
            args = entry_dict["args"] if "args" in entry_dict else {}

            if "action_head" in entry_dict:
                self._action_head_to_distribution[
                    entry_dict["action_head"]] = (distribution_type, args)

            elif "action_space" in entry_dict:
                sub_action_space = Factory(spaces.Space).type_from_name(
                    entry_dict["action_space"])

                for action_head in self.action_space.spaces:

                    if isinstance(self.action_space[action_head],
                                  sub_action_space):
                        self._action_head_to_distribution[action_head] = (
                            distribution_type, args)
Esempio n. 21
0
def test_cartpole_model_composer():
    env = GymMazeEnv(env='CartPole-v0')
    path_to_model_config = code_snippets.__path__._path[
        0] + '/custom_plain_cartpole_net.yaml'

    model_composer = Factory(base_type=BaseModelComposer).instantiate(
        yaml.load(open(path_to_model_config, 'r')),
        action_spaces_dict=env.action_spaces_dict,
        observation_spaces_dict=env.observation_spaces_dict,
        agent_counts_dict=env.agent_counts_dict)
Esempio n. 22
0
    def __init__(self, observation_spaces_dict: Dict[Union[str, int],
                                                     spaces.Dict],
                 agent_counts_dict: Dict[StepKeyType, int],
                 networks: CollectionOfConfigType):
        super().__init__(observation_spaces_dict, agent_counts_dict)

        # initialize critic
        model_registry = Factory(base_type=nn.Module)
        networks = list_to_dict(networks)
        self._critics = dict()
        for idx, (key, net_config) in enumerate(networks.items()):
            step_obs_shapes = self._obs_shapes[key]
            if idx > 0:
                step_obs_shapes = {
                    **step_obs_shapes, self.prev_value_key:
                    self.prev_value_shape
                }
            self._critics[key] = model_registry.instantiate(
                networks[key], obs_shapes=step_obs_shapes)
Esempio n. 23
0
    def __init__(self, observation_spaces_dict: Dict[StepKeyType, spaces.Dict],
                 agent_counts_dict: Dict[StepKeyType, int],
                 networks: ConfigType, stack_observations: bool):
        super().__init__(observation_spaces_dict, agent_counts_dict)
        assert len(networks) == 1
        self.stack_observations = stack_observations
        network = networks[0]

        obs_shapes_flat = self._obs_shapes
        if self.stack_observations:
            obs_shapes_flat = stacked_shapes(obs_shapes_flat,
                                             self._agent_counts_dict)
        obs_shapes_flat = flat_structured_shapes(obs_shapes_flat)
        self._obs_shapes = {0: obs_shapes_flat}

        # initialize critic
        model_registry = Factory(base_type=nn.Module)
        self._critics = {
            0: model_registry.instantiate(network, obs_shapes=obs_shapes_flat)
        }
Esempio n. 24
0
    def __init__(self, input_data: Optional[Union[str, Path,
                                                  List[Union[str, Path]]]],
                 conversion_env_factory: Optional[Callable], n_workers: int,
                 trajectory_processor: Union[TrajectoryProcessor, ConfigType],
                 deserialize_in_main_thread: bool):

        self._conversion_env_factory = conversion_env_factory
        self._conversion_env = self._conversion_env_factory(
        ) if self._conversion_env_factory else None
        self.n_workers = n_workers
        self._trajectory_processor = Factory(TrajectoryProcessor).instantiate(
            trajectory_processor)
        self._deserialize_in_main_thread = deserialize_in_main_thread

        self.step_records = []
        self.trajectory_references = []
        self.reporting_queue = None

        if input_data is not None:
            self.load_data(input_data)
Esempio n. 25
0
def _run_job(cfg: DictConfig) -> None:
    """Runs a regular maze job.

    :param cfg: Hydra configuration for the rollout.
    """
    set_matplotlib_backend()

    # If no env or agent base seed is given generate the seeds randomly and add them to the resolved hydra config
    if cfg.seeding.env_base_seed is None:
        cfg.seeding.env_base_seed = MazeSeeding.generate_seed_from_random_state(
            np.random.RandomState(None))
    if cfg.seeding.agent_base_seed is None:
        cfg.seeding.agent_base_seed = MazeSeeding.generate_seed_from_random_state(
            np.random.RandomState(None))

    # print and log config
    config_str = yaml.dump(OmegaConf.to_container(cfg, resolve=True),
                           sort_keys=False)
    with open("hydra_config.yaml", "w") as fp:
        fp.write("\n" + config_str)
    BColors.print_colored(config_str, color=BColors.HEADER)
    print("Output directory: {}\n".format(os.path.abspath(".")))

    # run job
    runner = Factory(base_type=Runner).instantiate(cfg.runner)
    runner.setup(cfg)
    runner.run()
Esempio n. 26
0
    def __init__(self, in_keys: Union[str, List[str]], out_keys: Union[str, List[str]],
                 in_shapes: Union[Sequence[int], List[Sequence[int]]], hidden_features: List[int],
                 non_lins: Union[str, type(nn.Module), List[str], List[type(nn.Module)]],
                 n_heads: Union[int, List[int]], attention_alpha: Union[List[float], float],
                 avg_last_head_attentions: bool,
                 attention_dropout: Union[float, List[float]]):

        super().__init__(in_keys=in_keys, out_keys=out_keys, in_shapes=in_shapes, in_num_dims=[3, 3],
                         out_num_dims=3)

        # Assertions
        assert len(self.in_keys) == 2, 'There should be two input keys, feature matrix + adjacency matrix'
        assert self.in_shapes[0][-2] == self.in_shapes[1][-1], 'The node dimension of the feature matrix should be ' \
                                                               'the same as the adjacency matrix\'s rows and ' \
                                                               f'columns {self.in_shapes}'
        assert self.in_shapes[1][-1] == self.in_shapes[1][-2], 'The adjacency matrix has to be a square matrix'
        self.avg_last_head_attentions = avg_last_head_attentions

        # Specify dummy dict creation function for adjacency matrix:
        self.dummy_dict_creators[1] = self._dummy_symmetric_adj_tensor_factory(self.in_shapes[1])

        # Init class objects
        self.input_features = self.in_shapes[0][-1]
        self.hidden_features = hidden_features

        # Create list of heads for each layer
        self.n_heads: List[int] = n_heads if isinstance(n_heads, list) else [n_heads] * len(self.hidden_features)

        # The output features of this block are equivalent to the specified last hidden features if
        #   :param avg_last_head_attention is set to true, otherwise the last output will be concatenated and as such is
        #   equivalent to the number of last hidden features times the last specified number of heads
        self.output_features = self.hidden_features[-1] if self.avg_last_head_attentions else \
            self.hidden_features[-1] * self.n_heads[-1]

        # Create list of non-linearity's for each layer
        non_lins = non_lins if isinstance(non_lins, list) else [non_lins] * len(self.hidden_features)
        self.non_lins: List[type(nn.Module)] = [Factory(base_type=nn.Module).type_from_name(non_lin)
                                                for non_lin in non_lins]

        # Create list of dropout for each layer
        self.attention_dropout = attention_dropout if isinstance(attention_dropout, list) \
            else [attention_dropout] * len(self.hidden_features)

        # Create list of alpha for each layer
        self.attention_alpha = attention_alpha if isinstance(attention_alpha, list) \
            else [attention_alpha] * len(self.hidden_features)

        # compile layer dictionary
        layer_dict = self.build_layer_dict()

        # compile network
        self.net = nn.Sequential(layer_dict)
Esempio n. 27
0
    def __init__(self, observation_spaces_dict: Dict[Union[str, int],
                                                     spaces.Dict],
                 agent_counts_dict: Dict[StepKeyType, int],
                 networks: CollectionOfConfigType):
        super().__init__(observation_spaces_dict, agent_counts_dict)

        # initialize critics
        networks = list_to_dict(networks)
        self._critics = {
            key: Factory(base_type=nn.Module).instantiate(
                networks[key], obs_shapes=self._obs_shapes[key])
            for key in networks.keys()
        }
    def __init__(self, env: MazeEnv, plot_function: Optional[str]):
        """Avoid calling this constructor directly, use :method:`wrap` instead."""
        super().__init__(env)

        # create event topics
        self.observation_events = self.core_env.context.event_service.create_event_topic(
            ObservationVisualizationEvents)

        # update plot function
        if plot_function is not None:
            function = Factory(Callable).type_from_name(plot_function)
            ObservationVisualizationEvents.observation_to_visualize.tensorboard_render_figure_dict[
                None] = function
Esempio n. 29
0
    def __init__(self, in_keys: Union[str, List[str]], out_keys: Union[str, List[str]],
                 in_shapes: Union[Sequence[int], List[Sequence[int]]], hidden_channels: List[int],
                 non_lin: Union[str, type(nn.Module)]):
        super().__init__(in_keys=in_keys, out_keys=out_keys, in_shapes=in_shapes, in_num_dims=4, out_num_dims=4)
        self.input_channels = self.in_shapes[0][-3]
        self.hidden_channels = hidden_channels
        self.non_lin = Factory(base_type=nn.Module).type_from_name(non_lin)
        self.output_channels = self.hidden_channels[-1]

        # compile layer dictionary
        layer_dict = self.build_layer_dict()

        # compile network
        self.net = nn.Sequential(layer_dict)
Esempio n. 30
0
    def _initialize_preprocessors(self) -> None:
        """Initialize pre-processors for all sub steps and all dictionary observations.
        """

        # get full flat observation space
        observation_spaces = flat_structured_space(
            self.observation_spaces_dict).spaces

        # maintain a list of temporary spaces
        temporary_spaces = []

        # iterate pre-processor config
        for mapping in self.pre_processor_mapping:
            obs_key = mapping["observation"]
            assert obs_key in observation_spaces, f"Observation {obs_key} not contained in observation space."

            pre_processor_cls = Factory(PreProcessor).type_from_name(
                mapping["_target_"])
            assert isinstance(mapping["config"], Mapping), \
                f"Make sure that the config for {pre_processor_cls.__name__} of observation {obs_key} is a dict!"
            processor = pre_processor_cls(
                observation_space=observation_spaces[obs_key],
                **mapping["config"])

            self._preprocessors.append(
                (obs_key, processor, mapping["keep_original"]))

            # append processed space
            tag = f"{obs_key}-{processor.tag()}"
            observation_spaces[tag] = processor.processed_space()

            # iterate all structured env sub steps and update observation spaces accordingly
            for sub_step_key, sub_space in self.observation_spaces_dict.items(
            ):

                # check if the subspace is contained
                if obs_key in sub_space.spaces:

                    # add new key to observation space
                    self.observation_spaces_dict[sub_step_key].spaces[
                        tag] = processor.processed_space()

                    # remove original key from observation space
                    if not mapping["keep_original"]:
                        temporary_spaces.append((sub_step_key, obs_key))

        # remove temporary spaces
        for sub_step_key, obs_key in temporary_spaces:
            self.observation_spaces_dict[sub_step_key].spaces.pop(obs_key)