def __init__( self, ob_space, ac_space, extractor, extractor_params, decoder=None, decoder_params=None, ): super(Actor, self).__init__() self.ac_size = ac_space if decoder is not None: self.decoder = decoder(ob_space, **decoder_params) self.ob_size = np.prod(np.array( output_shape(ob_space, decoder_params)), dtype=np.int) else: self.decoder = None self.ob_size = np.prod(np.array(ob_space), dtype=np.int) self.extractor = extractor(self.ob_size, **extractor_params) self.feature_size = np.prod(output_shape(self.ob_size + self.ac_size, extractor_params), dtype=np.int) self.mean_layer = nn.Linear(self.feature_size, self.ac_size) self.logstd_layer = nn.Linear(self.feature_size, self.ac_size)
def __init__( self, ob_space, ac_space, extractor, extractor_params, decoder=None, decoder_params=None, ): """ Deterministic actor for DDPG implementation. """ super(DetActor, self).__init__() self.ac_size = ac_space if decoder is not None: self.decoder = decoder(ob_space, **decoder_params) self.ob_size = np.prod(np.array( output_shape(ob_space, decoder_params)), dtype=np.int) else: self.decoder = None self.ob_size = np.prod(np.array(ob_space), dtype=np.int) self.extractor = extractor(self.ob_size, **extractor_params) self.feature_size = np.prod(output_shape(self.ob_size, extractor_params), dtype=np.int) self.mean_layer = nn.Linear(self.feature_size, self.ac_size)
def __init__( self, ob_space, ac_space, extractor, extractor_params, decoder=None, decoder_params=None, ): """ Conditional VAE, designed to match with BCQ (Fujimoto, 2019). """ super(VAE, self).__init__() self.ac_size = ac_space self.obs_decoder = None if decoder is not None: self.obs_decoder = decoder(ob_space, **decoder_params) self.ob_size = np.prod(np.array( output_shape(ob_space, decoder_params)), dtype=np.int) else: self.obs_decoder = None self.ob_size = np.prod(np.array(ob_space), dtype=np.int) vae_params = copy.deepcopy(extractor_params) vae_params['activ'] = 'relu' self.feature_size = np.prod(output_shape(self.ob_size, vae_params), dtype=np.int) # encoder self.enc_net = extractor(self.ob_size + self.ac_size, **vae_params) self.mu_layer = nn.Linear(self.feature_size, 2 * self.ac_size) self.logstd_layer = nn.Linear(self.feature_size, 2 * self.ac_size) # decoder self.dec_net = extractor(self.ob_size + 2 * self.ac_size, **vae_params) self.recon_layer = nn.Linear(self.feature_size, self.ac_size) del vae_params
def __init__(self, ob_space, ac_space, extractor, extractor_params, decoder=None, decoder_params=None, value_processing='none'): """ In: state & action Out: Q-value for given state-action pair :param ob_space : Shape of the observation space. :param ac_space : Shape of the action space. (must be 1-dimensional) :param extractor : Class of the extractor network. :param extractor_params : Keyword arguments for the extractor network. (optional) :param decoder : Class of the decoder network. (optional) :param decoder_params : Keyword arguments for the decoder network. """ super(GeneralCritic, self).__init__() self.ac_size = ac_space self.decoder = None if decoder is not None: self.decoder = decoder(ob_space, **decoder_params) self.ob_size = np.prod(np.array( output_shape(ob_space, decoder_params)), dtype=np.int) else: self.decoder = None self.ob_size = np.prod(np.array(ob_space), dtype=np.int) self.extractor = extractor(self.ob_size + self.ac_size, **extractor_params) self.feature_size = np.prod(output_shape(self.ob_size + self.ac_size, extractor_params), dtype=np.int) self.value_layer = nn.Linear(self.feature_size, 1) self.value_processing = functional_finder(value_processing)
def __init__( self, ob_space, ac_space, extractor, extractor_params, decoder=None, decoder_params=None, ): """ In: state & action Out: Action perturbation, pre-scaled. :param ob_space : Shape of the observation space. :param ac_space : Shape of the action space. (must be 1-dimensional) :param extractor : Class of the extractor network. :param extractor_params : Keyword arguments for the extractor network. (optional) :param decoder : Class of the decoder network. (optional) :param decoder_params : Keyword arguments for the decoder network. """ super(Perturb, self).__init__() self.ac_size = ac_space self.decoder = None if decoder is not None: self.decoder = decoder(ob_space, **decoder_params) self.ob_size = np.prod(np.array( output_shape(ob_space, decoder_params)), dtype=np.int) else: self.decoder = None self.ob_size = np.prod(np.array(ob_space), dtype=np.int) self.extractor = extractor(self.ob_size + self.ac_size, **extractor_params) self.feature_size = np.prod(output_shape(self.ob_size + self.ac_size, extractor_params), dtype=np.int) self.perturb_layer = nn.Linear(self.feature_size, self.ac_size)
def __init__( self, ob_space, ac_space, extractor, extractor_params, decoder=None, decoder_params=None, ): """ In: state, Out: state-wise log-lambda (Lagrangian multiplier), scalar. :param ob_space : Shape of the observation space. :param ac_space : Shape of the action space. (must be 1-dimensional) :param extractor : Class of the extractor network. :param extractor_params : Keyword arguments for the extractor network. (optional) :param decoder : Class of the decoder network. (optional) :param decoder_params : Keyword arguments for the decoder network. """ super(Lambda, self).__init__() self.ac_size = ac_space self.decoder = None if decoder is not None: self.decoder = decoder(ob_space, **decoder_params) self.ob_size = np.prod(np.array( output_shape(ob_space, decoder_params)), dtype=np.int) else: self.decoder = None self.ob_size = np.prod(np.array(ob_space), dtype=np.int) self.extractor = extractor(self.ob_size, **extractor_params) self.feature_size = np.prod(output_shape(self.ob_size, extractor_params), dtype=np.int) self.lambda_layer = nn.Linear(self.feature_size, 1)