Exemple #1
0
    def __init__(
        self,
        observation_shape,
        output_size,
        hidden_sizes=None,  # None for default (see below).
        lstm_size=256,
        nonlinearity=torch.nn.ReLU,
        normalize_observation=False,
        norm_obs_clip=10,
        norm_obs_var_clip=1e-6,
    ):
        """Instantiate neural net module according to inputs."""
        super().__init__()
        self._obs_n_dim = len(observation_shape)
        hidden_sizes = hidden_sizes or [256, 256]
        mlp_input_size = int(np.prod(observation_shape))
        self.mlp = MlpModel(
            input_size=mlp_input_size,
            hidden_sizes=hidden_sizes,
            output_size=None,
            nonlinearity=nonlinearity,
        )

        mlp_output_size = hidden_sizes[-1] if hidden_sizes else mlp_input_size
        self.lstm = torch.nn.LSTM(mlp_output_size + output_size + 1, lstm_size)
        self.pi = torch.nn.Linear(lstm_size, output_size)
        self.value = torch.nn.Linear(lstm_size, 1)
        if normalize_observation:
            self.obs_rms = RunningMeanStdModel(observation_shape)
            self.norm_obs_clip = norm_obs_clip
            self.norm_obs_var_clip = norm_obs_var_clip
        self.normalize_observation = normalize_observation
Exemple #2
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     lstm_size=256,
     nonlinearity=torch.nn.ReLU,
     normalize_observation=False,
     norm_obs_clip=10,
     norm_obs_var_clip=1e-6,
 ):
     super().__init__()
     self._obs_n_dim = len(observation_shape)
     self._action_size = action_size
     hidden_sizes = hidden_sizes or [256, 256]
     mlp_input_size = int(np.prod(observation_shape))
     self.mlp = MlpModel(
         input_size=mlp_input_size,
         hidden_sizes=hidden_sizes,
         output_size=None,
         nonlinearity=nonlinearity,
     )
     mlp_output_size = hidden_sizes[-1] if hidden_sizes else mlp_input_size
     self.lstm = torch.nn.LSTM(mlp_output_size + action_size + 1, lstm_size)
     self.head = torch.nn.Linear(lstm_size, action_size * 2 + 1)
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
Exemple #3
0
 def __init__(
         self,
         observation_shape,
         action_size,
         hidden_sizes=None,
         lstm_size=None,
         lstm_skip=True,
         constraint=True,
         hidden_nonlinearity="tanh",  # or "relu"
         mu_nonlinearity="tanh",
         init_log_std=0.,
         normalize_observation=True,
         var_clip=1e-6,
         ):
     super().__init__()
     if hidden_nonlinearity == "tanh":  # So these can be strings in config file.
         hidden_nonlinearity = torch.nn.Tanh
     elif hidden_nonlinearity == "relu":
         hidden_nonlinearity = torch.nn.ReLU
     else:
         raise ValueError(f"Unrecognized hidden_nonlinearity string: {hidden_nonlinearity}")
     if mu_nonlinearity == "tanh":  # So these can be strings in config file.
         mu_nonlinearity = torch.nn.Tanh
     elif mu_nonlinearity == "relu":
         mu_nonlinearity = torch.nn.ReLU
     else:
         raise ValueError(f"Unrecognized mu_nonlinearity string: {mu_nonlinearity}")
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     self.body = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes or [256, 256],
         nonlinearity=hidden_nonlinearity,
     )
     last_size = self.body.output_size
     if lstm_size:
         lstm_input_size = last_size + action_size + 1
         self.lstm = torch.nn.LSTM(lstm_input_size, lstm_size)
         last_size = lstm_size
     else:
         self.lstm = None
     mu_linear = torch.nn.Linear(last_size, action_size)
     if mu_nonlinearity is not None:
         self.mu = torch.nn.Sequential(mu_linear, mu_nonlinearity())
     else:
         self.mu = mu_linear
     self.value = torch.nn.Linear(last_size, 1)
     if constraint:
         self.constraint = torch.nn.Linear(last_size, 1)
     else:
         self.constraint = None
     self.log_std = torch.nn.Parameter(init_log_std *
         torch.ones(action_size))
     self._lstm_skip = lstm_skip
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.var_clip = var_clip
     self.normalize_observation = normalize_observation
Exemple #4
0
 def __init__(
         self,
         RndCls,  # type: BaseFeatureExtractor
         rnd_model_kwargs):
     """
     Constructs target and distillation model. Assumes identical architectures.
     Also constructs normalization models for observation and intrinsic rewards.
     """
     super().__init__()
     self.target_model = RndCls(**rnd_model_kwargs)
     self.distill_model = RndCls(**rnd_model_kwargs)
     rnd_param_init_(self.target_model)
     rnd_param_init_(self.distill_model)
     self.obs_rms = RunningMeanStdModel(
         wrap(rnd_model_kwargs["input_shape"])
     )  # Requires RndCls takes input_shape
     self.int_rff = None  # Intrinsic reward forward filter (this stores a discounted sum of non-episodic rewards)
     self.int_rff_rms = RunningMeanStdModel(torch.Size(
         [1]))  # Intrinsic reward forward filter RMS model
     self.update_norm = True  # Default to updating obs and int_rew normalization models
 def __init__(
         self,
         observation_shape,
         action_size,
         option_size,
         hidden_sizes=None,  # None for default (see below).
         hidden_nonlinearity=torch.nn.Tanh,  # Module form.
         mu_nonlinearity=torch.nn.Tanh,  # Module form.
         init_log_std=0.,
         normalize_observation=True,
         norm_obs_clip=10,
         norm_obs_var_clip=1e-6,
         baselines_init=True,  # Orthogonal initialization of sqrt(2) until last layer, then 0.01 for policy, 1 for value
         use_interest=False,  # IOC sigmoid interest functions
         use_diversity=False,  # TDEOC q entropy output
         use_attention=False,
         ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     from functools import partial
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     hidden_sizes = hidden_sizes or [64, 64]
     inits_mu = inits_v = None
     if baselines_init:
         inits_mu = (np.sqrt(2), 0.01)
         inits_v = (np.sqrt(2), 1.)
     body_mlp_class = partial(MlpModel, hidden_sizes=hidden_sizes, output_size=None, nonlinearity=hidden_nonlinearity, inits=inits_v)
     self.model = OptionCriticHead_IndependentPreprocessor(
         input_size=input_size,
         input_module_class=body_mlp_class,
         output_size=action_size,
         option_size=option_size,
         intra_option_policy='continuous',
         intra_option_kwargs={'init_log_std': init_log_std, 'mu_nonlinearity': mu_nonlinearity},
         input_module_kwargs={},
         use_interest=use_interest,
         use_diversity=use_diversity,
         use_attention=use_attention,
         baselines_init=baselines_init,
         orthogonal_init_base=inits_v[1],
         orthogonal_init_pol=inits_mu[1]
     )
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
     self.use_interest = use_interest
     self.use_diversity = use_diversity
     self.use_attention = use_attention
    def __init__(
        self,
        observation_shape,
        action_size,
        policy_hidden_sizes=None,
        policy_hidden_nonlinearity=torch.nn.Tanh,
        value_hidden_sizes=None,
        value_hidden_nonlinearity=torch.nn.Tanh,
        init_log_std=0.,
        min_std=0.,
        normalize_observation=False,
        norm_obs_clip=10,
        norm_obs_var_clip=1e-6,
        policy_inputs_indices=None,
    ):
        super().__init__()
        self.min_std = min_std
        self._obs_ndim = len(observation_shape)
        input_size = int(np.prod(observation_shape))
        self.policy_inputs_indices = policy_inputs_indices if policy_inputs_indices is not None else list(
            range(input_size))

        policy_hidden_sizes = [
            400, 300
        ] if policy_hidden_sizes is None else policy_hidden_sizes
        value_hidden_sizes = [
            400, 300
        ] if value_hidden_sizes is None else value_hidden_sizes
        self.mu = MlpModel(input_size=len(self.policy_inputs_indices),
                           hidden_sizes=policy_hidden_sizes,
                           output_size=action_size,
                           nonlinearity=policy_hidden_nonlinearity)
        self.v = MlpModel(
            input_size=input_size,
            hidden_sizes=value_hidden_sizes,
            output_size=1,
            nonlinearity=value_hidden_nonlinearity,
        )
        self._log_std = torch.nn.Parameter(
            (np.log(np.exp(init_log_std) - self.min_std)) *
            torch.ones(action_size))
        if normalize_observation:
            self.obs_rms = RunningMeanStdModel(observation_shape)
            self.norm_obs_clip = norm_obs_clip
            self.norm_obs_var_clip = norm_obs_var_clip
        self.normalize_observation = normalize_observation
Exemple #7
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     hidden_nonlinearity=torch.nn.Tanh,  # Module form.
     mu_nonlinearity=torch.nn.Tanh,  # Module form.
     init_log_std=0.,
     normalize_observation=True,
     norm_obs_clip=10,
     norm_obs_var_clip=1e-6,
     baselines_init=True,  # Orthogonal initialization of sqrt(2) until last layer, then 0.01 for policy, 1 for value
 ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     hidden_sizes = hidden_sizes or [64, 64]
     inits_mu = inits_v = None
     if baselines_init:
         inits_mu = (np.sqrt(2), 0.01)
         inits_v = (np.sqrt(2), 1.)
     mu_mlp = torch.jit.script(
         MlpModel(input_size=input_size,
                  hidden_sizes=hidden_sizes,
                  output_size=action_size,
                  nonlinearity=hidden_nonlinearity,
                  inits=inits_mu))
     if mu_nonlinearity is not None:
         self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
     else:
         self.mu = mu_mlp
     self.v = torch.jit.script(
         MlpModel(input_size=input_size,
                  hidden_sizes=hidden_sizes,
                  output_size=1,
                  nonlinearity=hidden_nonlinearity,
                  inits=inits_v))
     self.log_std = torch.nn.Parameter(init_log_std *
                                       torch.ones(action_size))
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
Exemple #8
0
    def __init__(
        self,
        image_shape,
        action_size,
        hidden_sizes=512,
        stop_conv_grad=False,
        channels=None,  # Defaults below.
        kernel_sizes=None,
        strides=None,
        paddings=None,
        kiaming_init=True,
        normalize_conv_out=False,
    ):
        super().__init__()
        c, h, w = image_shape
        self.conv = Conv2dModel(
            in_channels=c,
            channels=channels or [32, 64, 64],
            kernel_sizes=kernel_sizes or [8, 4, 3],
            strides=strides or [4, 2, 1],
            paddings=paddings,
        )
        self._conv_out_size = self.conv.conv_out_size(h=h, w=w)
        self.pi_v_mlp = MlpModel(
            input_size=self._conv_out_size,
            hidden_sizes=hidden_sizes,
            output_size=action_size + 1,
        )
        if kiaming_init:
            self.apply(weight_init)

        self.stop_conv_grad = stop_conv_grad
        logger.log("Model stopping gradient at CONV." if stop_conv_grad else
                   "Modeul using gradients on all parameters.")
        if normalize_conv_out:
            # Havent' seen this make a difference yet.
            logger.log("Model normalizing conv output across all pixels.")
            self.conv_rms = RunningMeanStdModel((1, ))
            self.var_clip = 1e-6
        self.normalize_conv_out = normalize_conv_out
Exemple #9
0
 def __init__(
     self,
     observation_shape,
     action_size,
     hidden_sizes=None,  # None for default (see below).
     hidden_nonlinearity=torch.nn.Tanh,  # Module form.
     mu_nonlinearity=torch.nn.Tanh,  # Module form.
     init_log_std=0.,
     normalize_observation=False,
     norm_obs_clip=10,
     norm_obs_var_clip=1e-6,
 ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     hidden_sizes = hidden_sizes or [64, 64]
     mu_mlp = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=action_size,
         nonlinearity=hidden_nonlinearity,
     )
     if mu_nonlinearity is not None:
         self.mu = torch.nn.Sequential(mu_mlp, mu_nonlinearity())
     else:
         self.mu = mu_mlp
     self.v = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=1,
         nonlinearity=hidden_nonlinearity,
     )
     self.log_std = torch.nn.Parameter(init_log_std *
                                       torch.ones(action_size))
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
 def __init__(
         self,
         observation_shape,
         action_size,
         option_size,
         hidden_sizes=None,  # None for default (see below).
         hidden_nonlinearity=torch.nn.Tanh,  # Module form.
         mu_nonlinearity=torch.nn.Tanh,  # Module form.
         init_log_std=0.,
         normalize_observation=True,
         norm_obs_clip=10,
         norm_obs_var_clip=1e-6,
         baselines_init=True,  # Orthogonal initialization of sqrt(2) until last layer, then 0.01 for policy, 1 for value
         use_interest=False,  # IOC sigmoid interest functions
         ):
     """Instantiate neural net modules according to inputs."""
     super().__init__()
     self._obs_ndim = len(observation_shape)
     input_size = int(np.prod(observation_shape))
     hidden_sizes = hidden_sizes or [64, 64]
     inits_mu = inits_v = None
     if baselines_init:
         inits_mu = (np.sqrt(2), 0.01)
         inits_v = (np.sqrt(2), 1.)
     # Body for intra-option policy mean
     mu_mlp = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=None,
         nonlinearity=hidden_nonlinearity,
         inits=inits_mu
     )
     # Intra-option policy. Outputs tanh mu if exists, else unactivateed linear. Also logstd
     self.mu = torch.nn.Sequential(mu_mlp, ContinuousIntraOptionPolicy(input_size=mu_mlp.output_size,
                                                                       num_options=option_size,
                                                                       num_actions=action_size,
                                                                       ortho_init=baselines_init,
                                                                       ortho_init_value=inits_mu[-1],
                                                                       init_log_std=init_log_std,
                                                                       mu_nonlinearity=mu_nonlinearity))
     # Option value. Pure linear
     self.q = MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=option_size,
         nonlinearity=hidden_nonlinearity,
         inits=inits_v
     )
     # Option termination. MLP with sigmoid at end
     self.beta = torch.nn.Sequential(MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=option_size,
         nonlinearity=hidden_nonlinearity,
         inits=inits_v
     ), torch.nn.Sigmoid())
     # self.log_std = torch.nn.Parameter(init_log_std * torch.ones(action_size))
     # Softmax policy over options
     self.pi_omega = torch.nn.Sequential(MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=option_size,
         nonlinearity=hidden_nonlinearity,
         inits=inits_v
     ), torch.nn.Softmax(-1))
     # Per-option sigmoid interest functions
     self.pi_omega_I = torch.nn.Sequential(MlpModel(
         input_size=input_size,
         hidden_sizes=hidden_sizes,
         output_size=option_size,
         nonlinearity=hidden_nonlinearity,
         inits=inits_v
     ), torch.nn.Sigmoid()) if use_interest else Dummy(option_size)
     if normalize_observation:
         self.obs_rms = RunningMeanStdModel(observation_shape)
         self.norm_obs_clip = norm_obs_clip
         self.norm_obs_var_clip = norm_obs_var_clip
     self.normalize_observation = normalize_observation
     self.use_interest = use_interest