Exemple #1
0
 def __init__(
     self,
     env_spec,
     regressor_args=None,
 ):
     Serializable.quick_init(self, locals())
     super(GaussianMLPBaseline, self).__init__(env_spec)
     if regressor_args is None:
         regressor_args = dict()
     self._regressor = GaussianMLPRegressor(
         input_shape=(env_spec.observation_space.flat_dim, ),
         output_dim=1,
         name="vf",
         **regressor_args)
Exemple #2
0
def buildGMLP(nonLin):
    regArgs = {}
    regArgs['normalize_inputs'] = False
    regArgs['normalize_outputs'] = False
    regArgs['hidden_nonlinearity'] = nonLin
    regArgs['hidden_sizes'] = (64, 64, 8)
    #only used if adaptive_std == True
    regArgs['std_hidden_sizes'] = (32, 16, 16)
    regArgs['adaptive_std'] = False
    regArgs['learn_std'] = False

    gMLP_reg = GaussianMLPRegressor(input_shape=(1, ),
                                    output_dim=1,
                                    name="vf1",
                                    **regArgs)
    return gMLP_reg
Exemple #3
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
    ):
        self._subsample_factor = subsample_factor
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name="vf",
            **regressor_args)
    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
            target_key='returns',
    ):
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
            output_dim=1,
            name='vf_' + target_key,
            **regressor_args
        )
        self._target_key = target_key
    def __init__(
        self,
        env_spec,
        policy,
        recurrent=False,
        predict_all=True,
        obs_regressed='all',
        act_regressed='all',
        use_only_sign=False,
        noisify_traj_coef=0,
        optimizer=None,  # this defaults to LBFGS
        regressor_args=None,  # here goes all args straight to the regressor: hidden_sizes, TR, step_size....
    ):
        """
        :param predict_all: this is only for the recurrent case, to use all hidden states as predictions
        :param obs_regressed: list of index of the obs variables used to fit the regressor. default string 'all'
        :param act_regressed: list of index of the act variables used to fit the regressor. default string 'all'
        :param regressor_args:
        """
        self.env_spec = env_spec
        self.policy = policy
        self.latent_dim = policy.latent_dim
        self.recurrent = recurrent
        self.predict_all = predict_all
        self.use_only_sign = use_only_sign
        self.noisify_traj_coef = noisify_traj_coef
        self.regressor_args = regressor_args
        # decide what obs variables will be regressed upon
        if obs_regressed == 'all':
            self.obs_regressed = list(
                range(env_spec.observation_space.flat_dim))
        else:
            self.obs_regressed = obs_regressed
        # decide what action variables will be regressed upon
        if act_regressed == 'all':
            self.act_regressed = list(range(env_spec.action_space.flat_dim))
        else:
            self.act_regressed = act_regressed
        # shape the input dimension of the NN for the above decisions.
        self.obs_act_dim = len(self.obs_regressed) + len(self.act_regressed)

        Serializable.quick_init(self, locals())  # ??

        if regressor_args is None:
            regressor_args = dict()

        if optimizer == 'first_order':
            self.optimizer = FirstOrderOptimizer(
                max_epochs=10,  # both of these are to match Rocky's 10
                batch_size=128,
            )
        elif optimizer is None:
            self.optimizer = None
        else:
            raise NotImplementedError

        if policy.latent_name == 'bernoulli':
            if self.recurrent:
                self._regressor = BernoulliRecurrentRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    predict_all=self.predict_all,
                    **regressor_args)
            else:
                self._regressor = BernoulliMLPRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    **regressor_args)
        elif policy.latent_name == 'categorical':
            if self.recurrent:
                self._regressor = CategoricalRecurrentRegressor(  # not implemented
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    # predict_all=self.predict_all,
                    **regressor_args)
            else:
                self._regressor = CategoricalMLPRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    **regressor_args)
        elif policy.latent_name == 'normal':
            self._regressor = GaussianMLPRegressor(
                input_shape=(self.obs_act_dim, ),
                output_dim=policy.latent_dim,
                optimizer=self.optimizer,
                **regressor_args)
        else:
            raise NotImplementedError