Esempio n. 1
0
 def initialize(self,
                env_spaces,
                share_memory=False,
                global_B=1,
                env_ranks=None):
     super().initialize(env_spaces,
                        share_memory,
                        global_B=global_B,
                        env_ranks=env_ranks)
     self.distribution = Categorical(dim=env_spaces.action.n)
     self.distribution_omega = Categorical(
         dim=self.model_kwargs["option_size"])
Esempio n. 2
0
 def initialize(self,
                env_spaces,
                share_memory=False,
                global_B=1,
                env_ranks=None):
     _initial_model_state_dict = self.initial_model_state_dict
     self.initial_model_state_dict = None  # Don't let base agent try to load.
     super().initialize(env_spaces,
                        share_memory,
                        global_B=global_B,
                        env_ranks=env_ranks)
     self.initial_model_state_dict = _initial_model_state_dict
     self.q1_model = self.QModelCls(**self.env_model_kwargs,
                                    **self.q_model_kwargs)
     self.q2_model = self.QModelCls(**self.env_model_kwargs,
                                    **self.q_model_kwargs)
     self.target_q1_model = self.QModelCls(**self.env_model_kwargs,
                                           **self.q_model_kwargs)
     self.target_q2_model = self.QModelCls(**self.env_model_kwargs,
                                           **self.q_model_kwargs)
     self.target_q1_model.load_state_dict(self.q1_model.state_dict())
     self.target_q2_model.load_state_dict(self.q2_model.state_dict())
     if self.initial_model_state_dict is not None:
         self.load_state_dict(self.initial_model_state_dict)
     # assert len(env_spaces.action.shape) == 1
     # self.distribution = Gaussian(
     #     dim=env_spaces.action.shape[0],
     #     squash=self.action_squash,
     #     min_std=np.exp(MIN_LOG_STD),
     #     max_std=np.exp(MAX_LOG_STD),
     # )
     self.distribution = Categorical(dim=env_spaces.action.n)
Esempio n. 3
0
    def initialize(self,
                   env_spaces,
                   share_memory=False,
                   global_B=1,
                   env_ranks=None):
        super().initialize(env_spaces,
                           share_memory,
                           global_B=global_B,
                           env_ranks=env_ranks)
        self.distribution = Categorical(dim=env_spaces.action.n)

        self.augs_funcs = OrderedDict()
        aug_to_func = {
            'crop': rad.random_crop,
            'crop_horiz': rad.random_crop_horizontile,
            'grayscale': rad.random_grayscale,
            'cutout': rad.random_cutout,
            'cutout_color': rad.random_cutout_color,
            'flip': rad.random_flip,
            'rotate': rad.random_rotation,
            'rand_conv': rad.random_convolution,
            'color_jitter': rad.random_color_jitter,
            'no_aug': rad.no_aug,
        }

        if self.data_augs == "":
            aug_names = []
        else:
            aug_names = self.data_augs.split('-')
        for aug_name in aug_names:
            assert aug_name in aug_to_func, 'invalid data aug string'
            self.augs_funcs[aug_name] = aug_to_func[aug_name]
Esempio n. 4
0
    def __init__(
            self,
            observation_shape,
            hidden_sizes,
            action_size,
            n_tile=20,
    ):
        super().__init__()
        self._obs_ndim = 1
        self._n_tile = n_tile
        input_dim = int(np.sum(observation_shape))

        self._action_size = action_size
        self.mlp_loc = MlpModel(
            input_size=input_dim,
            hidden_sizes=hidden_sizes,
            output_size=4
        )
        self.mlp_delta = MlpModel(
            input_size=input_dim + 4 * n_tile,
            hidden_sizes=hidden_sizes,
            output_size=3 * 2,
        )

        self.delta_distribution = Gaussian(
            dim=3,
            squash=True,
            min_std=np.exp(MIN_LOG_STD),
            max_std=np.exp(MAX_LOG_STD),
        )
        self.cat_distribution = Categorical(4)

        self._counter = 0
Esempio n. 5
0
    def __init__(
            self,
            observation_shape,
            hidden_sizes,
            action_size,
            all_corners=False
            ):
        super().__init__()
        self._obs_ndim = 1
        self._all_corners = all_corners
        input_dim = int(np.sum(observation_shape))

        print('all corners', self._all_corners)
        delta_dim = 12 if all_corners else 3
        self._delta_dim = delta_dim
        self.mlp = MlpModel(
            input_size=input_dim,
            hidden_sizes=hidden_sizes,
            output_size=2 * delta_dim + 4, # 3 for each corners, times two for std, 4 probs
        )

        self.delta_distribution = Gaussian(
            dim=delta_dim,
            squash=True,
            min_std=np.exp(MIN_LOG_STD),
            max_std=np.exp(MAX_LOG_STD),
        )
        self.cat_distribution = Categorical(4)
Esempio n. 6
0
    def initialize(self, n_updates, cuda_idx=None):
        self.device = torch.device("cpu") if cuda_idx is None else torch.device(
            "cuda", index=cuda_idx)

        examples = self.load_replay()
        self.encoder = self.EncoderCls(
            image_shape=examples.observation.shape,
            latent_size=self.latent_size,  # UNUSED
            **self.encoder_kwargs
        )
        if self.onehot_action:
            act_dim = self.replay_buffer.samples.action.max() + 1  # discrete only
            self.distribution = Categorical(act_dim)
        else:
            act_shape = self.replay_buffer.samples.action.shape[2:]
            assert len(act_shape) == 1
            act_dim = act_shape[0]
        self.vae_head = self.VaeHeadCls(
            latent_size=self.latent_size,
            action_size=act_dim * self.delta_T,
            hidden_sizes=self.hidden_sizes,
        )
        self.decoder = self.DecoderCls(
            latent_size=self.latent_size,
            **self.decoder_kwargs
        )
        self.encoder.to(self.device)
        self.vae_head.to(self.device)
        self.decoder.to(self.device)

        self.optim_initialize(n_updates)

        if self.initial_state_dict is not None:
            self.load_state_dict(self.initial_state_dict)
Esempio n. 7
0
    def initialize(self, n_updates, cuda_idx=None):
        self.device = torch.device(
            "cpu") if cuda_idx is None else torch.device("cuda",
                                                         index=cuda_idx)

        examples = self.load_replay()
        self.encoder = self.EncoderCls(
            image_shape=examples.observation.shape,
            latent_size=10,  # UNUSED
            **self.encoder_kwargs)

        if self.onehot_actions:
            act_dim = self.replay_buffer.samples.action.max() + 1
            self.distribution = Categorical(act_dim)
        else:
            assert len(self.replay_buffer.samples.action.shape == 3)
            act_dim = self.replay_buffer.samples.action.shape[2]
        self.inverse_model = self.InverseModelCls(
            input_size=self.encoder.conv_out_size,
            action_size=act_dim,
            num_actions=self.delta_T,
            use_input="conv",
            **self.inverse_model_kwargs)
        self.encoder.to(self.device)
        self.inverse_model.to(self.device)

        self.optim_initialize(n_updates)

        if self.initial_state_dict is not None:
            self.load_state_dict(self.initial_state_dict)
 def initialize(self,
                env_spaces,
                share_memory=False,
                global_B=1,
                env_ranks=None):
     super().initialize(env_spaces,
                        share_memory,
                        global_B=global_B,
                        env_ranks=env_ranks)
     self.distribution = Categorical(dim=env_spaces.action.n)
Esempio n. 9
0
 def initialize(self, env_spaces, share_memory=False,
         global_B=1, env_ranks=None):
     super().initialize(env_spaces, share_memory,
         global_B=global_B, env_ranks=env_ranks)
     if self.override['override_policy_value']:
         policy_layers=self.override["policy_layers"]
         value_layers=self.override["value_layers"]
         self.model.override_policy_value(policy_layers=policy_layers,
             value_layers=value_layers)
     self.distribution = Categorical(dim=env_spaces.action.n)
Esempio n. 10
0
 def initialize(self,
                env_spaces,
                share_memory=False,
                global_B=1,
                env_ranks=None):
     super().initialize(env_spaces, share_memory)
     assert len(env_spaces.action.shape) == 1
     self.distribution = Gaussian(
         dim=env_spaces.action.shape[0],
         # min_std=MIN_STD,
         # clip=env_spaces.action.high[0],  # Probably +1?
     )
     self.distribution_omega = Categorical(
         dim=self.model_kwargs["option_size"])
Esempio n. 11
0
 def initialize(self, env_spaces, share_memory=False,
         global_B=1, env_ranks=None):
     _initial_model_state_dict = self.initial_model_state_dict
     self.initial_model_state_dict = None  # Don't let base agent try to load.
     super().initialize(env_spaces, share_memory,
         global_B=global_B, env_ranks=env_ranks)
     self.initial_model_state_dict = _initial_model_state_dict
     self.q1_model = self.QModelCls(**self.env_model_kwargs, **self.q_model_kwargs)
     self.q2_model = self.QModelCls(**self.env_model_kwargs, **self.q_model_kwargs)
     self.target_q1_model = self.QModelCls(**self.env_model_kwargs,
         **self.q_model_kwargs)
     self.target_q2_model = self.QModelCls(**self.env_model_kwargs,
         **self.q_model_kwargs)
     self.target_q1_model.load_state_dict(self.q1_model.state_dict())
     self.target_q2_model.load_state_dict(self.q2_model.state_dict())
     if self.initial_model_state_dict is not None:
         self.load_state_dict(self.initial_model_state_dict)
     self.distribution = Categorical(dim=env_spaces.action.n)
Esempio n. 12
0
    def initialize(self, env_spaces, share_memory=False, global_B=1, env_ranks=None):
        self.model = self.ModelCls(
            image_shape=env_spaces.observation.shape,
            output_size=env_spaces.action.n,
            **self.model_kwargs
        )  # Model will have stop_grad inside it.
        if self.load_conv:
            logger.log("Agent loading state dict: " + self.state_dict_filename)
            loaded_state_dict = torch.load(
                self.state_dict_filename, map_location=torch.device("cpu")
            )
            # From UL, saves snapshot: params["algo_state_dict"]["encoder"]
            loaded_state_dict = loaded_state_dict.get(
                "algo_state_dict", loaded_state_dict
            )
            loaded_state_dict = loaded_state_dict.get("encoder", loaded_state_dict)
            # A bit onerous, but ensures that state dicts match:
            conv_state_dict = OrderedDict(
                [
                    (k.replace("conv.", "", 1), v)
                    for k, v in loaded_state_dict.items()
                    if k.startswith("conv.")
                ]
            )
            self.model.conv.load_state_dict(conv_state_dict)
            logger.log("Agent loaded CONV state dict.")
        elif self.load_all:
            # From RL, saves snapshot: params["agent_state_dict"]
            loaded_state_dict = torch.load(
                self.state_dict_filename, map_location=torch.device("cpu")
            )
            self.load_state_dict(loaded_state_dict["agent_state_dict"])
            logger.log("Agnet loaded FULL state dict.")
        else:
            logger.log("Agent NOT loading state dict.")

        if share_memory:
            self.model.share_memory()
            self.shared_model = self.model
        if self.initial_model_state_dict is not None:
            raise NotImplementedError
        self.distribution = Categorical(dim=env_spaces.action.n)
        self.env_spaces = env_spaces
        self.share_memory = share_memory
Esempio n. 13
0
    def initialize(self,
                   env_spaces,
                   share_memory=False,
                   global_B=1,
                   env_ranks=None):
        _initial_model_state_dict = self.initial_model_state_dict
        self.initial_model_state_dict = None  # Don't let base agent try to load.
        super().initialize(env_spaces,
                           share_memory,
                           global_B=global_B,
                           env_ranks=env_ranks)
        self.initial_model_state_dict = _initial_model_state_dict

        self.critic = self.CriticCls(**self.env_model_kwargs,
                                     EncoderCls=self.EncoderCls,
                                     encoder_kwargs=self.encoder_kwargs,
                                     **self.critic_kwargs)
        self.target_model = self.CriticCls(**self.env_model_kwargs,
                                           EncoderCls=self.EncoderCls,
                                           encoder_kwargs=self.encoder_kwargs,
                                           **self.critic_kwargs)
        self.decoder = self.DecoderCls(**self.encoder_kwargs)
        # self.q1_model = self.QModelCls(**self.env_model_kwargs, **self.q_model_kwargs)
        # self.q2_model = self.QModelCls(**self.env_model_kwargs, **self.q_model_kwargs)
        # self.target_q1_model = self.QModelCls(**self.env_model_kwargs,
        #     **self.q_model_kwargs)
        # self.target_q2_model = self.QModelCls(**self.env_model_kwargs,
        #     **self.q_model_kwargs)
        self.target_model.load_state_dict(self.critic.state_dict())
        # Tie the Encoder of the actor to that of the critic
        self.model.encoder.copy_weights_from(self.critic.encoder)

        # self.target_q1_model.load_state_dict(self.q1_model.state_dict())
        # self.target_q2_model.load_state_dict(self.q2_model.state_dict())
        if self.initial_model_state_dict is not None:
            self.load_state_dict(self.initial_model_state_dict)
        # assert len(env_spaces.action.shape) == 1
        # self.distribution = Gaussian(
        #     dim=env_spaces.action.shape[0],
        #     squash=self.action_squash,
        #     min_std=np.exp(MIN_LOG_STD),
        #     max_std=np.exp(MAX_LOG_STD),
        # )
        self.distribution = Categorical(dim=env_spaces.action.n)
Esempio n. 14
0
 def initialize(self,
                env_spaces,
                share_memory=False,
                global_B=1,
                env_ranks=None):
     """Extends base method to build Gaussian distribution."""
     super().initialize(env_spaces,
                        share_memory,
                        global_B=global_B,
                        env_ranks=env_ranks)
     assert len(env_spaces.action.shape) == 1
     assert len(np.unique(env_spaces.action.high)) == 1
     assert np.all(env_spaces.action.low == -env_spaces.action.high)
     self.distribution = Gaussian(
         dim=env_spaces.action.shape[0],
         # min_std=MIN_STD,
         # clip=env_spaces.action.high[0],  # Probably +1?
     )
     self.distribution_omega = Categorical(
         dim=self.model_kwargs["option_size"])
Esempio n. 15
0
 def initialize(self, env_spaces, share_memory=False):
     super().initialize(env_spaces, share_memory)
     self.distribution = Categorical(dim=env_spaces.action.n)