예제 #1
0
    def __init__(self, env_factory: Callable[[],
                                             Union[StructuredEnv,
                                                   StructuredEnvSpacesMixin,
                                                   LogStatsEnv]],
                 policy: TorchPolicy, n_rollout_steps: int, n_actors: int,
                 batch_size: int, actor_env_seeds: List[int]):
        super().__init__(env_factory, policy, n_rollout_steps, n_actors,
                         batch_size)

        self.broadcasting_container = BroadcastingContainer()
        self.current_actor_idx = 0

        self.actors: List[RolloutGenerator] = []
        self.policy_version_counter = 0

        for env_seed in actor_env_seeds:
            env = env_factory()
            env.seed(env_seed)
            actor = RolloutGenerator(env=env,
                                     record_logits=True,
                                     record_episode_stats=True)
            self.actors.append(actor)

        if self.n_actors > self.batch_size:
            BColors.print_colored(
                f'It does not make much sense to have more actors (given value: {n_actors}) than '
                f'the actor_batch_size (given value: {batch_size}) when using the DummyMultiprocessingModule.',
                color=BColors.WARNING)
예제 #2
0
파일: maze_cli.py 프로젝트: enlite-ai/maze
def _run_job(cfg: DictConfig) -> None:
    """Runs a regular maze job.

    :param cfg: Hydra configuration for the rollout.
    """
    set_matplotlib_backend()

    # If no env or agent base seed is given generate the seeds randomly and add them to the resolved hydra config
    if cfg.seeding.env_base_seed is None:
        cfg.seeding.env_base_seed = MazeSeeding.generate_seed_from_random_state(
            np.random.RandomState(None))
    if cfg.seeding.agent_base_seed is None:
        cfg.seeding.agent_base_seed = MazeSeeding.generate_seed_from_random_state(
            np.random.RandomState(None))

    # print and log config
    config_str = yaml.dump(OmegaConf.to_container(cfg, resolve=True),
                           sort_keys=False)
    with open("hydra_config.yaml", "w") as fp:
        fp.write("\n" + config_str)
    BColors.print_colored(config_str, color=BColors.HEADER)
    print("Output directory: {}\n".format(os.path.abspath(".")))

    # run job
    runner = Factory(base_type=Runner).instantiate(cfg.runner)
    runner.setup(cfg)
    runner.run()
예제 #3
0
    def summarize_reward(self,
                         maze_state: Optional[MazeStateType] = None
                         ) -> List[float]:
        """implementation of :class:`~maze.core.env.reward.RewardAggregatorInterface` interface
        """

        terminal_events = list(
            self.query_events([
                CartPoleEvents.cart_moved_away, CartPoleEvents.pole_fell_over
            ]))
        done = True if len(terminal_events) > 0 else False

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                BColors.print_colored(
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior.",
                    BColors.WARNING)
            self.steps_beyond_done += 1
            reward = 0.0

        # in more complex scenarios (e.g., multi-agent or multi-objective) working with lists
        # is often convenient (even though not required for this simple example).
        return [reward]
예제 #4
0
 def plot_inference_graphs(nets_type, nets):
     """Draw inference graphs."""
     for net_name, net_model in nets.items():
         if not os.path.exists(f'{nets_type}_{net_name}'):
             if isinstance(net_model, InferenceBlock):
                 InferenceGraph(net_model).save(
                     f'{nets_type}_{net_name}', './')
             else:
                 children = net_model.children()
                 inference_blocks = list(
                     filter(lambda cc: isinstance(cc, InferenceBlock),
                            children))
                 if len(inference_blocks) == 1:
                     InferenceGraph(inference_blocks[0]).save(
                         f'{nets_type}_{net_name}', './')
                 elif len(inference_blocks) > 1:
                     BColors.print_colored(
                         f'More than one inference block was found for'
                         f' {nets_type}-{net_name}, please revisit the model and make '
                         f'sure only one is present', BColors.WARNING)
                 else:
                     BColors.print_colored(
                         f'No inference block could be found in '
                         f'{nets_type}-{net_name}, thus no visual representation '
                         f'(of the model) could be created or saved',
                         BColors.WARNING)
예제 #5
0
파일: maze_cli.py 프로젝트: enlite-ai/maze
def set_matplotlib_backend() -> None:
    """Switch matplotlib backend for maze runs on headless machines to Agg (non-interactive).
    """
    if not os.environ.get('MPLBACKEND') and not os.environ.get('DISPLAY'):
        BColors.print_colored(
            f"INFO: No display detected! Switching matplotlib to headless backend Agg!",
            color=BColors.OKBLUE)
        matplotlib.use('Agg')
예제 #6
0
def setup_logging(job_config: Union[DictConfig, str, None],
                  log_dir: str = ".") -> None:
    """Setup tensorboard logging, derive the logging directory from the script name.

    :param job_config: Configuration written as text to tensorboard (experiment config).
    :param log_dir: log_dir for TensorBoard.
    """
    # hydra handles the working directory
    writer = LogStatsWriterTensorboard(log_dir=log_dir,
                                       tensorboard_render_figure=True)
    register_log_stats_writer(writer)
    # attach a console writer as well for immediate console feedback
    register_log_stats_writer(LogStatsWriterConsole())

    summary_writer = writer.summary_writer
    summary_writer.add_text("cmd", " ".join(sys.argv))

    if job_config is not None:
        # log run settings
        if isinstance(job_config, DictConfig):
            if job_config.__dict__["_metadata"].flags.get(
                    "allow_objects", False):
                # Hydra was instructed to allow objects. This was done by the Python training API, hence we might have
                # Python objects in our config, which makes logging the config to file not possible.
                # todo Making this work for the Python training API would require reversing the Hydra instantiaton, i.e.
                #  generate a Hydra configuration from Python objects (at least partially).
                BColors.print_colored(
                    "Logging run configurations with injected non-primitives is not supported yet. For now please don't"
                    " inject non-primitives if you wish to log the configuration of your run.",
                    BColors.WARNING)
                return
            else:
                job_config = OmegaConf.to_yaml(job_config)

        # prepare config text for tensorboard
        job_config = job_config.replace("\n", "</br>")
        job_config = job_config.replace(" ", "&nbsp;")

        summary_writer.add_text("job_config", job_config)

    # Load the figures from the given files and add them to tensorboard.
    for net_image_path in glob.glob('*.figure.pkl'):
        network_description = net_image_path.split('/')[-1].replace(
            '.figure.pkl', '')
        fig = pickle.load(open(net_image_path, 'rb'))
        summary_writer.add_figure(f'{network_description}', fig, close=True)
        os.remove(net_image_path)
예제 #7
0
 def _init_trainer_from_input_dir(cls, trainer: Trainer,
                                  state_dict_dump_file: str,
                                  input_dir: str) -> None:
     """Initialize trainer from given state dict and input directory.
     :param trainer: The trainer to initialize.
     :param state_dict_dump_file: The state dict dump file relative to input_dir.
     :param input_dir: The directory to load the state dict from.
     """
     with SwitchWorkingDirectoryToInput(input_dir):
         if os.path.exists(state_dict_dump_file):
             BColors.print_colored(
                 f"Trainer and model initialized from '{state_dict_dump_file}' of run '{input_dir}'!",
                 BColors.OKGREEN)
             trainer.load_state(state_dict_dump_file)
         else:
             BColors.print_colored(
                 "Model initialized with random weights! ", BColors.OKGREEN)
예제 #8
0
    def _generate_ephemeral_init_kwargs(self) -> Dict[str, str]:
        """
        Prevent inconsistencies of type 3: Instantiated objects not triggering Hydra to load the correct dependent
        config modules.
        To prevent incorrectly resolved config group names, we identify qualifier set as instantiated objects and derive
        the corresponding module name for these. This module name is passed to the Hydra instantiation, after which it
        is removed for the config. In subsequent steps the instantiated qualifer is injected in the loaded Hydra
        configuration, after which it is deleted (hence "ephmeral initialization kwargs").
        :return: Ephemeral initialization keyword arguments.
        """

        ephemeral_init_kwargs: Dict[str, str] = {}
        # "configuration" is a qualifier too, but the API only accepts config module names for it, therefore there is
        # no need to adjust the module loading process.
        for qual_attr in ("algorithm", "model", "launcher", "env", "runner"):
            # Ignore string attributes, since those will specify a module name anyway and thus are not ephemeral.
            if self._kwargs.get(qual_attr) and not isinstance(
                    self._kwargs[qual_attr], str):
                try:
                    module_names = self._map_comp_spec_to_conf_module_names(
                        qual_attr, self._kwargs[qual_attr])
                    assert qual_attr in module_names
                    ambiguous_attrs: Set[str] = set()

                    for arg in module_names:
                        # Ignore if already explicitly set via config module name and hence no ephemeral kwarg
                        # necessary.
                        if type(self._kwargs.get(
                                arg)) in _PrimitiveType.__args__:
                            continue
                        if len(module_names[arg]) == 1:
                            ephemeral_init_kwargs[arg] = next(
                                iter(module_names[arg]))
                        else:
                            ambiguous_attrs.add(arg)

                    if len(ambiguous_attrs):
                        raise InvalidSpecificationError(
                            "Ambiguous mapping for attribute(s) {aa} while resolving qualifying argument {arg}."
                            .format(aa=ambiguous_attrs, arg=qual_attr))
                except InvalidSpecificationError as error:
                    BColors.print_colored(
                        "Warning: {e} Configuration groups derived from this argument may not be initialized correctly."
                        .format(e=error.args[0]), BColors.WARNING)

        return ephemeral_init_kwargs
예제 #9
0
 def create_distributed_rollout_actors(
         self, env_factory: Callable[[], Union[StructuredEnv,
                                               StructuredEnvSpacesMixin,
                                               LogStatsEnv]],
         policy: TorchPolicy, n_rollout_steps: int, n_actors: int,
         batch_size: int, queue_out_of_sync_factor: float,
         env_instance_seeds: List[int],
         agent_instance_seeds: List[int]) -> SubprocDistributedActors:
     """Create dummy (sequentially-executed) actors."""
     BColors.print_colored(
         'Determinism by seeding of the IMPALA algorithm with the Local runner can not be '
         'guarantied due to the asynchronicity of the implementation.',
         BColors.WARNING)
     return SubprocDistributedActors(env_factory, policy, n_rollout_steps,
                                     n_actors, batch_size,
                                     queue_out_of_sync_factor,
                                     self.start_method, env_instance_seeds,
                                     agent_instance_seeds)
예제 #10
0
 def re_init_networks(self) -> None:
     """Reinitialize all parameters of the network."""
     for key, critic in self.networks.items():
         # initialize model weights
         if isinstance(critic, InferenceBlock):
             critic.apply(make_module_init_normc(1.0))
             for block_key in critic.perception_dict:
                 if block_key == 'q_value' or block_key.endswith('_q_values'):
                     critic.perception_dict[block_key].apply(make_module_init_normc(0.01))
         else:
             inference_blocks = list(filter(lambda cc: isinstance(cc, InferenceBlock), critic.children()))
             if len(inference_blocks) == 1:
                 inference_blocks[0].apply(make_module_init_normc(1.0))
                 for block_key in inference_blocks[0].perception_dict:
                     if block_key == 'q_value' or block_key.endswith('_q_values'):
                         inference_blocks[0].perception_dict[block_key].apply(make_module_init_normc(0.01))
             else:
                 BColors.print_colored(f'More or less than one inference block was found for'
                                       f' {key}, therefore the model could not be reinitialized', BColors.WARNING)
예제 #11
0
    def save_models(self) -> None:
        """Save the policies and critics as pdfs."""
        def plot_inference_graphs(nets_type, nets):
            """Draw inference graphs."""
            for net_name, net_model in nets.items():
                if not os.path.exists(f'{nets_type}_{net_name}'):
                    if isinstance(net_model, InferenceBlock):
                        InferenceGraph(net_model).save(
                            f'{nets_type}_{net_name}', './')
                    else:
                        children = net_model.children()
                        inference_blocks = list(
                            filter(lambda cc: isinstance(cc, InferenceBlock),
                                   children))
                        if len(inference_blocks) == 1:
                            InferenceGraph(inference_blocks[0]).save(
                                f'{nets_type}_{net_name}', './')
                        elif len(inference_blocks) > 1:
                            BColors.print_colored(
                                f'More than one inference block was found for'
                                f' {nets_type}-{net_name}, please revisit the model and make '
                                f'sure only one is present', BColors.WARNING)
                        else:
                            BColors.print_colored(
                                f'No inference block could be found in '
                                f'{nets_type}-{net_name}, thus no visual representation '
                                f'(of the model) could be created or saved',
                                BColors.WARNING)

        try:
            if self.policy:
                plot_inference_graphs("policy", self.policy.networks)
            if self.critic:
                plot_inference_graphs("critic", self.critic.networks)
        except ImportError as e:
            BColors.print_colored(
                f'Models graphical representation could not be saved: {e}',
                BColors.WARNING)
예제 #12
0
 def create_distributed_rollouts(
     self,
     env: Union[StructuredEnv, StructuredEnvSpacesMixin],
     shared_noise: SharedNoiseTable,
     agent_instance_seed: int,
 ) -> ESDistributedRollouts:
     """use multi-process rollout generation"""
     BColors.print_colored(
         'Determinism by seeding of the ES algorithm with the Local runner can not be '
         'guarantied due to the asynchronicity of the implementation.',
         BColors.WARNING)
     n_workers = self.n_train_workers + self.n_eval_workers
     return ESSubprocDistributedRollouts(
         env_factory=self.env_factory,
         n_training_workers=self.n_train_workers,
         n_eval_workers=self.n_eval_workers,
         shared_noise=self.shared_noise,
         env_seeds=[
             self.maze_seeding.generate_env_instance_seed()
             for _ in range(n_workers)
         ],
         agent_seed=agent_instance_seed,
         start_method=self.start_method)
예제 #13
0
    def update(self, reward: float) -> None:
        """Implementation of ModelSelection.update().

        :param reward: Reward (score) used for best model selection.
        """
        self.last_improvement += 1

        if reward > self.best_reward:
            if self.verbose:
                BColors.print_colored(
                    f"-> new overall best model {reward:.5f}!",
                    color=BColors.OKBLUE)
            self.best_reward = reward
            self.last_improvement = 0

            # update best model so far
            if self.model:
                self.best_state_dict = self.model.state_dict()

            # save state to file
            if self.dump_file:
                if self.verbose:
                    BColors.print_colored(
                        f"-> dumping new best model to {self.dump_file}!",
                        color=BColors.OKBLUE)
                torch.save(self.best_state_dict, self.dump_file)

        # regularly dump model
        if self.dump_interval and self.update_count % self.dump_interval == 0:

            # update dump path
            filename, file_extension = os.path.splitext(self.dump_file)
            dump_file = f'{filename}-epoch_{self.update_count}{file_extension}'
            if dump_file == self.dump_file:
                BColors.print_colored(
                    "Best model dumps get overwritten by regular model dumps!",
                    color=BColors.WARNING)

            # save state to file
            if self.verbose:
                BColors.print_colored(f"-> regular model dump to {dump_file}!",
                                      color=BColors.OKBLUE)
            state_dict = self.model.state_dict()
            torch.save(state_dict, dump_file)

        self.update_count += 1
예제 #14
0
    def setup(self, cfg: DictConfig) -> None:
        """
        See :py:meth:`~maze.train.trainers.common.training_runner.TrainingRunner.setup`.
        """

        super().setup(cfg)

        env = self.env_factory()

        with SwitchWorkingDirectoryToInput(cfg.input_dir):
            dataset = Factory(base_type=Dataset).instantiate(
                self.dataset, conversion_env_factory=self.env_factory)

        assert len(dataset) > 0, f"Expected to find trajectory data, but did not find any. Please check that " \
                                 f"the path you supplied is correct."
        size_in_byte, size_in_gbyte = getsize(dataset)
        BColors.print_colored(
            f'Size of loaded dataset: {size_in_byte} -> {size_in_gbyte} GB',
            BColors.OKBLUE)
        validation, train = self._split_dataset(
            dataset, cfg.algorithm.validation_percentage,
            self.maze_seeding.generate_env_instance_seed())

        # Create data loaders
        torch_generator = torch.Generator().manual_seed(
            self.maze_seeding.generate_env_instance_seed())
        train_data_loader = DataLoader(train,
                                       shuffle=True,
                                       batch_size=cfg.algorithm.batch_size,
                                       generator=torch_generator,
                                       num_workers=self.dataset.n_workers)

        policy = TorchPolicy(
            networks=self._model_composer.policy.networks,
            distribution_mapper=self._model_composer.distribution_mapper,
            device=cfg.algorithm.device,
            substeps_with_separate_agent_nets=self._model_composer.policy.
            substeps_with_separate_agent_nets)
        policy.seed(self.maze_seeding.agent_global_seed)

        self._model_selection = BestModelSelection(
            self.state_dict_dump_file,
            policy,
            dump_interval=self.dump_interval)
        optimizer = Factory(Optimizer).instantiate(cfg.algorithm.optimizer,
                                                   params=policy.parameters())
        loss = BCLoss(action_spaces_dict=env.action_spaces_dict,
                      entropy_coef=cfg.algorithm.entropy_coef)

        self._trainer = BCTrainer(algorithm_config=self._cfg.algorithm,
                                  data_loader=train_data_loader,
                                  policy=policy,
                                  optimizer=optimizer,
                                  loss=loss)

        # initialize model from input_dir
        self._init_trainer_from_input_dir(
            trainer=self._trainer,
            state_dict_dump_file=self.state_dict_dump_file,
            input_dir=cfg.input_dir)

        # evaluate using the validation set
        self.evaluators = []
        if len(validation) > 0:
            validation_data_loader = DataLoader(
                validation,
                shuffle=True,
                batch_size=cfg.algorithm.batch_size,
                generator=torch_generator,
                num_workers=self.dataset.n_workers)
            self.evaluators += [
                BCValidationEvaluator(
                    data_loader=validation_data_loader,
                    loss=loss,
                    logging_prefix="eval-validation",
                    model_selection=self.
                    _model_selection  # use the validation set evaluation to select the best model
                )
            ]

        # if evaluation episodes are set, perform additional evaluation by policy rollout
        if cfg.algorithm.n_eval_episodes > 0:
            eval_env = self.create_distributed_eval_env(
                self.env_factory,
                self.eval_concurrency,
                logging_prefix="eval-rollout")
            eval_env_instance_seeds = [
                self.maze_seeding.generate_env_instance_seed()
                for _ in range(self.eval_concurrency)
            ]
            eval_env.seed(eval_env_instance_seeds)
            self.evaluators += [
                RolloutEvaluator(eval_env,
                                 n_episodes=cfg.algorithm.n_eval_episodes,
                                 model_selection=None)
            ]
예제 #15
0
파일: __init__.py 프로젝트: enlite-ai/maze
""" MazeRL init """
import os

from maze.utils.bcolors import BColors

__version__ = "0.1.8"

# fixes this issue (https://github.com/pytorch/pytorch/issues/37377) when using conda
if "MKL_THREADING_LAYER" not in os.environ or os.environ[
        'MKL_THREADING_LAYER'] != 'GNU':
    BColors.print_colored(
        "INFO: Setting MKL_THREADING_LAYER=GNU to avoid PyTorch issues with conda!",
        color=BColors.OKBLUE)
    os.environ['MKL_THREADING_LAYER'] = 'GNU'

# set number of threads to 1 to avoid performance drop with distributed environments
if "OMP_NUM_THREADS" not in os.environ:
    BColors.print_colored(
        "INFO: Setting OMP_NUM_THREADS=1 to avoid performance drop when using distributed environments!",
        color=BColors.OKBLUE)
    os.environ["OMP_NUM_THREADS"] = "1"
예제 #16
0
    def template_q_value_net(
            self,
            observation_space: Optional[spaces.Dict],
            action_space: spaces.Dict,
            only_discrete_spaces: bool,
            perception_net: Optional[InferenceBlock] = None) -> InferenceBlock:
        """Compiles a template state action (Q) value network.

        :param observation_space: The input observations for the perception network.
        :param action_space: The action space that defines the network action heads.
        :param perception_net: A initial network to continue from.
                               (e.g. useful for shared weights. Model building continues from the key 'latent'.)
        :param only_discrete_spaces: A dict specifying if the action spaces w.r.t. the step only hold discrete action
                                     spaces.
        :return: A q value network (critic) InferenceBlock.
        """
        assert all(map(lambda space: isinstance(space, (spaces.Discrete, spaces.Box)),
                       action_space.spaces.values())), 'Only discrete and box spaces supported thus far for q values ' \
                                                       'critic.'

        if not only_discrete_spaces:
            discrete_space = list(
                filter(
                    lambda kk: isinstance(action_space.spaces[kk], spaces.
                                          Discrete), action_space.spaces))
            if len(discrete_space) > 0:
                new_action_space = {}
                for key in action_space.spaces.keys():
                    if key in discrete_space:
                        new_action_space[key] = OneHotPreProcessor(
                            action_space.spaces[key]).processed_space()
                    else:
                        new_action_space[key] = action_space.spaces[key]
                action_space = spaces.Dict(new_action_space)
            observation_space = spaces.Dict({
                **observation_space.spaces,
                **action_space.spaces
            })
            value_heads = {'q_value': 1}
        else:
            value_heads = {
                f'{act_key}_q_values': act_space.n
                for act_key, act_space in action_space.spaces.items()
            }

        # check if actions are considered as observations for the state-action critic
        for action_head in action_space.spaces.keys():
            if action_head not in self.model_builder.observation_modality_mapping:
                BColors.print_colored(
                    f'TemplateModelComposer: The action \'{action_head}\' could not be found in the '
                    f'model_builder.observation_modality_mapping and wont be considered '
                    f'as an input to the state-action critic!', BColors.FAIL)

        # build perception net
        if perception_net is None:
            perception_net = self.template_perception_net(observation_space)

        perception_dict = perception_net.perception_dict
        for value_head, output_units in value_heads.items():
            # initialize action head
            value_net = LinearOutputBlock(
                in_keys="latent",
                out_keys=value_head,
                in_shapes=perception_dict["latent"].out_shapes(),
                output_units=output_units)

            module_init = make_module_init_normc(std=0.01)
            value_net.apply(module_init)

            # extent perception dictionary
            perception_dict[value_head] = value_net

        # compile inference model
        net = InferenceBlock(in_keys=perception_net.in_keys,
                             out_keys=list(value_heads.keys()),
                             in_shapes=perception_net.in_shapes,
                             perception_blocks=perception_dict)

        return net
예제 #17
0
    def from_observation_space(
            self, observation_space: spaces.Dict) -> InferenceBlock:
        """implementation of :class:`~maze.perception.builders.base.BaseModelBuilder` interface
        """

        # get a sample observation
        sample = observation_space.sample()

        # init perception dict
        perception_dict = dict()
        in_keys = list()

        # --- iterate and process observations ---
        for obs_key in observation_space.spaces.keys():
            if obs_key not in self.observation_modality_mapping:
                BColors.print_colored(
                    f'ConcatModelBuilder: The observation \'{obs_key}\' could not be found in the '
                    f'model_builder.observation_modality_mapping and wont be considered as an input to the network.',
                    BColors.WARNING)
                continue
            in_keys.append(obs_key)
            modality = self.observation_modality_mapping[obs_key]
            block_type = self.obs_to_block[modality]

            # compile network block
            params = self.block_params[modality]
            net = block_type(in_keys=obs_key,
                             out_keys=f"{obs_key}_{block_type.__name__}",
                             in_shapes=sample[obs_key].shape,
                             **params)
            perception_dict[f"{obs_key}_{block_type.__name__}"] = net

        # --- merge latent space observations ---
        out_key = ConcatModelBuilderKeys.CONCAT
        if ConcatModelBuilderKeys.HIDDEN not in self.obs_to_block \
                and ConcatModelBuilderKeys.RECURRENCE not in self.obs_to_block:
            out_key = ConcatModelBuilderKeys.LATENT

        latent_keys = list(perception_dict.keys())
        latent_shapes = [
            net.out_shapes()[0] for net in perception_dict.values()
        ]
        net = ConcatenationBlock(in_keys=latent_keys,
                                 out_keys=out_key,
                                 in_shapes=latent_shapes,
                                 concat_dim=-1)
        perception_dict[out_key] = net

        # --- process with presets ---
        if ConcatModelBuilderKeys.HIDDEN in self.obs_to_block:
            in_key = out_key
            out_key = ConcatModelBuilderKeys.HIDDEN
            if ConcatModelBuilderKeys.RECURRENCE not in self.obs_to_block:
                out_key = ConcatModelBuilderKeys.LATENT

            block_type = self.obs_to_block[ConcatModelBuilderKeys.HIDDEN]
            net = block_type(
                in_keys=in_key,
                out_keys=out_key,
                in_shapes=perception_dict[in_key].out_shapes(),
                **self.block_params[ConcatModelBuilderKeys.HIDDEN])
            perception_dict[out_key] = net

        if ConcatModelBuilderKeys.RECURRENCE in self.obs_to_block:
            in_key = out_key
            out_key = ConcatModelBuilderKeys.LATENT

            block_type = self.obs_to_block[ConcatModelBuilderKeys.RECURRENCE]
            net = block_type(
                in_keys=in_key,
                out_keys=out_key,
                in_shapes=perception_dict[in_key].out_shapes(),
                **self.block_params[ConcatModelBuilderKeys.RECURRENCE])
            perception_dict[out_key] = net

        # compile inference block
        in_shapes = [sample[obs_key].shape for obs_key in in_keys]
        net = InferenceBlock(in_keys=in_keys,
                             out_keys=ConcatModelBuilderKeys.LATENT,
                             in_shapes=in_shapes,
                             perception_blocks=perception_dict)

        return net
예제 #18
0
    def _train_async(self, n_epochs) -> None:
        """Train policy using the synchronous advantage actor critic.

        :param n_epochs: number of epochs to train.
        """

        # run training epochs
        if n_epochs <= 0:
            n_epochs = sys.maxsize
        epoch_length = self.algorithm_config.epoch_length
        patience = self.algorithm_config.patience

        # Perform a hard update on the critic
        self.learner_model.critic.update_target_weights(1.0)

        # run training epochs
        for epoch in range(n_epochs):
            start = time.time()
            print("Update epoch - {}".format(epoch))

            # compute evaluation reward
            reward = -np.inf
            if self.evaluator:
                self.evaluate()
            # take training reward and notify model selection
            else:
                if epoch > 0:
                    prev_reward = reward
                    try:
                        reward = self.distributed_workers.get_stats_value(
                            BaseEnvEvents.reward,
                            LogStatsLevel.EPOCH,
                            name="mean")
                    except KeyError:
                        reward = prev_reward

                # best model selection
                self.model_selection.update(reward)

            # evaluate policy
            time_evaluation = time.time() - start

            # early stopping
            if patience and self.model_selection.last_improvement > patience:
                BColors.print_colored(
                    "-> no improvement since {} epochs: EARLY STOPPING!".
                    format(patience),
                    color=BColors.WARNING)
                increment_log_step()
                break

            time_deq_actors = 0
            time_before_update = time.time()
            for epoch_step_idx in range(epoch_length):
                q_size_before, q_size_after, time_deq_actors = self.distributed_workers.collect_rollouts(
                )

                # Record the queue sizes
                self.events.estimated_queue_sizes(after=q_size_after,
                                                  before=q_size_before)

                # policy update
                for batch_updates in range(
                        self.algorithm_config.num_batches_per_iter):
                    self._update()
                    total_num_batch_updates =\
                        (batch_updates + epoch_step_idx * self.algorithm_config.num_batches_per_iter +
                         (epoch_length * self.algorithm_config.num_batches_per_iter) * epoch)
                    if total_num_batch_updates % self.algorithm_config.target_update_interval == 0:
                        self.learner_model.critic.update_target_weights(
                            self.algorithm_config.tau)

                    self.distributed_workers.broadcast_updated_policy(
                        self.learner_model.policy.state_dict())
            time_updating = time.time() - time_before_update

            total_time = time.time() - start
            self.events.time_dequeuing_actors(time=time_deq_actors,
                                              percent=time_deq_actors /
                                              total_time)

            # Buffer events
            self.events.buffer_size(len(
                self.distributed_workers.replay_buffer))
            self.events.buffer_avg_pick_per_transition(
                value=self.distributed_workers.replay_buffer.
                cum_moving_avg_num_picks)

            # increase step counter (which in turn triggers the log statistics writing)
            increment_log_step()

            print("Time required for epoch: {:.2f}s".format(total_time))
            print(
                ' - total ({} steps) updating: {:.2f}s ({:.2f}%), mean time/step: {:.2f}s'
                .format(
                    epoch_length * self.algorithm_config.num_batches_per_iter,
                    time_updating, time_updating / total_time, time_updating /
                    (epoch_length *
                     self.algorithm_config.num_batches_per_iter)))
            print(
                ' - total time evaluating the model: {:.2f}s ({:.2f}%)'.format(
                    time_evaluation, time_evaluation / total_time))
예제 #19
0
    def train(self, n_epochs: Optional[int] = None) -> None:
        """Main train method of the actor critic trainer. This is used in order to do algorithm specific operations
        around this method in the main train method which is called by the runner. (e.g. this is used when it comes to
        multiprocessing)

        :param n_epochs: Number of epochs to train.
        """

        n_epochs = self.algorithm_config.n_epochs if n_epochs is None else n_epochs

        # init minimum best model selection for early stopping
        if self.model_selection is None:
            self.model_selection = BestModelSelection(dump_file=None, model=None)

        # preserve original training coef setting
        value_loss_coef = self.algorithm_config.value_loss_coef
        policy_loss_coef = self.algorithm_config.policy_loss_coef
        entropy_coef = self.algorithm_config.entropy_coef

        # run training epochs
        if n_epochs <= 0:
            n_epochs = sys.maxsize

        for epoch in range(n_epochs):
            start = time.time()
            print("Update epoch - {}".format(epoch))

            # check for critic burn in and reset coefficient to only update the critic
            if epoch < self.algorithm_config.critic_burn_in_epochs:
                self.algorithm_config.value_loss_coef = 1.0
                self.algorithm_config.policy_loss_coef = 0.0
                self.algorithm_config.entropy_coef = 0.0
            else:
                self.algorithm_config.value_loss_coef = value_loss_coef
                self.algorithm_config.policy_loss_coef = policy_loss_coef
                self.algorithm_config.entropy_coef = entropy_coef

            # compute evaluation reward
            reward = -np.inf
            if self.evaluator:
                self.evaluate()
            # take training reward and notify best model selection manually
            else:
                if epoch > 0:
                    prev_reward = reward
                    try:
                        reward = self.rollout_generator.get_stats_value(BaseEnvEvents.reward, LogStatsLevel.EPOCH,
                                                                        name="mean")
                    except:
                        reward = prev_reward

                self.model_selection.update(reward)

            # early stopping
            if self.algorithm_config.patience and \
                    self.model_selection.last_improvement > self.algorithm_config.patience:
                BColors.print_colored("-> no improvement since {} epochs: EARLY STOPPING!"
                                      .format(self.algorithm_config.patience), color=BColors.WARNING)
                increment_log_step()
                break

            # policy update
            for _ in tqdm(range(self.algorithm_config.epoch_length)):
                update_start = time.time()
                self._update()
                self.ac_events.time_update(time.time() - update_start)

            epoch_time = time.time() - start
            self.ac_events.time_epoch(epoch_time)

            # increase step counter (which in turn triggers the log statistics writing)
            increment_log_step()

            print("Time required for epoch: {:.2f}s".format(epoch_time))
예제 #20
0
def manipulate_hparams_logging_for_exp(exp_dir: str, metrics: List[Tuple[
    str, Union[Callable[[np.ndarray], float], float], str]],
                                       clear_hparams: bool) -> None:
    """Manipulate the hparams logging for a given experiment directory.

    That is ether add hparams logging by adding a new tfevents file, or replace an already present hparams event files,
    or simple remove all hparams event files.

    :param exp_dir: The experiment directory.
    :param metrics: A list of metrics to be added to tensorboard. Each tuple in the list should consist of the key to
        query the original events file, the funciton to aggregate the values to a single float values and a simple
        name describing the function.. e.g. metrics=[('train_BaseEnvEvents/reward/mean', np.max, 'max')]
        Note: Instead of the callable a float value can be given as well, which will be used instead of querying the
        file.
    :param clear_hparams: Optional value specifying if the hparams file should simple be deleted.
    """

    # Get hparams file if present and delete it
    tf_hparams_summary_files = glob.glob(
        f"{exp_dir}/*events.out.tfevents*_hparams")
    if len(tf_hparams_summary_files) > 0:
        for ff in tf_hparams_summary_files:
            os.remove(ff)

    # Assert that only one events file is present
    tf_summary_files = glob.glob(f"{exp_dir}/*events.out.tfevents*")
    hydra_config_file = os.path.join(exp_dir, '.hydra/config.yaml')
    if len(tf_summary_files) == 0 or not os.path.exists(hydra_config_file):
        return
    assert len(tf_summary_files) == 1

    if not clear_hparams:

        # Read confg.yaml file as hyperparameters
        assert os.path.exists(hydra_config_file)
        cfg = yaml.safe_load(open(hydra_config_file))
        hparam_dict = flatten(dict(cfg))

        # compute maximum for each given metric from the original events file
        metrics_dict = dict()
        for (metric_key, metric_func, metric_func_name) in metrics:
            try:
                if isinstance(metric_func, float):
                    new_metric_name = f'{metric_key}-{metric_func_name}'
                    metrics_dict[new_metric_name] = metric_func
                else:
                    events_df = tensorboard_to_pandas(tf_summary_files[0])
                    new_metric_name = f'{metric_key}-{metric_func_name}'
                    metrics_dict[new_metric_name] = metric_func(
                        np.asarray(events_df.loc[metric_key]))
            except KeyError:
                BColors.print_colored(
                    f'The given metric key: {metric_key} could not be found in the summary file for exp: '
                    f'{exp_dir}', BColors.WARNING)

        # Store all files and dirs present in the directory before creating a new summary file writer
        all_elems_in_exp_dir_before = set(os.listdir(exp_dir))

        # Create the summary file writer
        summary_writer = SummaryWriter(log_dir=exp_dir,
                                       filename_suffix='_hparams')

        # Add hparams to the summary writer and close
        print(
            f'- Adding Tensorflow hparams events for runs in directory: {exp_dir}'
        )
        summary_writer.add_hparams(hparam_dict, metrics_dict)
        summary_writer.close()

        # Check what new files and dirs have been created.
        new_elems = set(os.listdir(exp_dir)) - all_elems_in_exp_dir_before
        new_dirs = list(
            filter(lambda x: os.path.isdir(os.path.join(exp_dir, x)),
                   list(new_elems)))
        new_files = list(
            filter(lambda x: os.path.isfile(os.path.join(exp_dir, x)),
                   list(new_elems)))

        # There should be one new file and one new dir
        assert len(new_dirs) == 1, new_dirs
        assert len(new_files) == 1, new_files

        # Get the proper hparams events file
        proper_file = os.listdir(os.path.join(exp_dir, new_dirs[0]))
        assert len(proper_file) == 1

        # Remove emtpy file (created from some unknown reason)
        os.remove(os.path.join(exp_dir, new_files[0]))

        # Move the proper hparams events file into the same dir, for same naming in tensorboard
        os.rename(os.path.join(exp_dir, new_dirs[0], proper_file[0]),
                  os.path.join(exp_dir, new_files[0]))

        # Remove the now empty dir created by the summary writer
        os.rmdir(os.path.join(exp_dir, new_dirs[0]))