예제 #1
0
 def __init__(
     self,
     env_spec,
     regressor_args=None,
 ):
     Serializable.quick_init(self, locals())
     super(GaussianMLPBaseline, self).__init__(env_spec)
     if regressor_args is None:
         regressor_args = dict()
     self._regressor = GaussianMLPRegressor(
         input_shape=(env_spec.observation_space.flat_dim, ),
         output_dim=1,
         name="vf",
         **regressor_args)
예제 #2
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
    ):
        self._subsample_factor = subsample_factor
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name="vf",
            **regressor_args)
예제 #3
0
    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
            target_key='returns',
    ):
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
            output_dim=1,
            name='vf_' + target_key,
            **regressor_args
        )
        self._target_key = target_key
예제 #4
0
class GaussianMLPBaseline(Baseline, Parameterized, Serializable):
    def __init__(
        self,
        env_spec,
        regressor_args=None,
    ):
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()
        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim, ),
            output_dim=1,
            name="vf",
            **regressor_args)

    @overrides
    def fit(self, paths):
        observations = np.concatenate([p["observations"] for p in paths])
        returns = np.concatenate([p["returns"] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    @overrides
    def predict(self, path):
        return self._regressor.predict(path["observations"]).flatten()

    @overrides
    def get_param_values(self, **tags):
        return self._regressor.get_param_values(**tags)

    @overrides
    def set_param_values(self, flattened_params, **tags):
        self._regressor.set_param_values(flattened_params, **tags)
예제 #5
0
class GaussianMLPBaseline(Baseline, Parameterized, Serializable):

    def __init__(
            self,
            env_spec,
            regressor_args=None,
    ):
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()
        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim,),
            output_dim=1,
            name="vf",
            **regressor_args
        )

    @overrides
    def fit(self, paths):
        observations = np.concatenate([p["observations"] for p in paths])
        returns = np.concatenate([p["returns"] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    @overrides
    def predict(self, path):
        return self._regressor.predict(path["observations"]).flatten()

    @overrides
    def get_param_values(self, **tags):
        return self._regressor.get_param_values(**tags)

    @overrides
    def set_param_values(self, flattened_params, **tags):
        self._regressor.set_param_values(flattened_params, **tags)
예제 #6
0
 def __init__(
         self,
         env_spec,
         regressor_args=None,
 ):
     Serializable.quick_init(self, locals())
     super(GaussianMLPBaseline, self).__init__(env_spec)
     if regressor_args is None:
         regressor_args = dict()
     self._regressor = GaussianMLPRegressor(
         input_shape=(env_spec.observation_space.flat_dim,),
         output_dim=1,
         name="vf",
         **regressor_args
     )
예제 #7
0
def buildGMLP(nonLin):
    regArgs = {}
    regArgs['normalize_inputs'] = False
    regArgs['normalize_outputs'] = False
    regArgs['hidden_nonlinearity'] = nonLin
    regArgs['hidden_sizes'] = (64, 64, 8)
    #only used if adaptive_std == True
    regArgs['std_hidden_sizes'] = (32, 16, 16)
    regArgs['adaptive_std'] = False
    regArgs['learn_std'] = False

    gMLP_reg = GaussianMLPRegressor(input_shape=(1, ),
                                    output_dim=1,
                                    name="vf1",
                                    **regArgs)
    return gMLP_reg
class GaussianMLPBaseline(Baseline, Parameterized, Serializable):
    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
    ):
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        self._subsample_factor = subsample_factor
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
            output_dim=1,
            name="vf",
            **regressor_args
        )

    @overrides
    def fit(self, paths):
        # --
        # Subsample before fitting.
        if self._subsample_factor < 1:
            lst_rnd_idx = []
            for path in paths:
                # Subsample index
                path_len = len(path['returns'])
                rnd_idx = np.random.choice(path_len, int(np.ceil(path_len * self._subsample_factor)),
                                           replace=False)
                lst_rnd_idx.append(rnd_idx)
            observations = np.concatenate([p["observations"][idx] for p, idx in zip(paths, lst_rnd_idx)])
            returns = np.concatenate([p["returns"][idx] for p, idx in zip(paths, lst_rnd_idx)])
        else:
            observations = np.concatenate([p["observations"] for p in paths])
            returns = np.concatenate([p["returns"] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    @overrides
    def predict(self, path):
        return self._regressor.predict(path["observations"]).flatten()

    @overrides
    def get_param_values(self, **tags):
        return self._regressor.get_param_values(**tags)

    @overrides
    def set_param_values(self, flattened_params, **tags):
        self._regressor.set_param_values(flattened_params, **tags)
    def __init__(
        self,
        env_spec,
        policy,
        recurrent=False,
        predict_all=True,
        obs_regressed='all',
        act_regressed='all',
        use_only_sign=False,
        noisify_traj_coef=0,
        optimizer=None,  # this defaults to LBFGS
        regressor_args=None,  # here goes all args straight to the regressor: hidden_sizes, TR, step_size....
    ):
        """
        :param predict_all: this is only for the recurrent case, to use all hidden states as predictions
        :param obs_regressed: list of index of the obs variables used to fit the regressor. default string 'all'
        :param act_regressed: list of index of the act variables used to fit the regressor. default string 'all'
        :param regressor_args:
        """
        self.env_spec = env_spec
        self.policy = policy
        self.latent_dim = policy.latent_dim
        self.recurrent = recurrent
        self.predict_all = predict_all
        self.use_only_sign = use_only_sign
        self.noisify_traj_coef = noisify_traj_coef
        self.regressor_args = regressor_args
        # decide what obs variables will be regressed upon
        if obs_regressed == 'all':
            self.obs_regressed = list(
                range(env_spec.observation_space.flat_dim))
        else:
            self.obs_regressed = obs_regressed
        # decide what action variables will be regressed upon
        if act_regressed == 'all':
            self.act_regressed = list(range(env_spec.action_space.flat_dim))
        else:
            self.act_regressed = act_regressed
        # shape the input dimension of the NN for the above decisions.
        self.obs_act_dim = len(self.obs_regressed) + len(self.act_regressed)

        Serializable.quick_init(self, locals())  # ??

        if regressor_args is None:
            regressor_args = dict()

        if optimizer == 'first_order':
            self.optimizer = FirstOrderOptimizer(
                max_epochs=10,  # both of these are to match Rocky's 10
                batch_size=128,
            )
        elif optimizer is None:
            self.optimizer = None
        else:
            raise NotImplementedError

        if policy.latent_name == 'bernoulli':
            if self.recurrent:
                self._regressor = BernoulliRecurrentRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    predict_all=self.predict_all,
                    **regressor_args)
            else:
                self._regressor = BernoulliMLPRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    **regressor_args)
        elif policy.latent_name == 'categorical':
            if self.recurrent:
                self._regressor = CategoricalRecurrentRegressor(  # not implemented
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    # predict_all=self.predict_all,
                    **regressor_args)
            else:
                self._regressor = CategoricalMLPRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    **regressor_args)
        elif policy.latent_name == 'normal':
            self._regressor = GaussianMLPRegressor(
                input_shape=(self.obs_act_dim, ),
                output_dim=policy.latent_dim,
                optimizer=self.optimizer,
                **regressor_args)
        else:
            raise NotImplementedError
class Latent_regressor(Parameterized, Serializable):
    def __init__(
        self,
        env_spec,
        policy,
        recurrent=False,
        predict_all=True,
        obs_regressed='all',
        act_regressed='all',
        use_only_sign=False,
        noisify_traj_coef=0,
        optimizer=None,  # this defaults to LBFGS
        regressor_args=None,  # here goes all args straight to the regressor: hidden_sizes, TR, step_size....
    ):
        """
        :param predict_all: this is only for the recurrent case, to use all hidden states as predictions
        :param obs_regressed: list of index of the obs variables used to fit the regressor. default string 'all'
        :param act_regressed: list of index of the act variables used to fit the regressor. default string 'all'
        :param regressor_args:
        """
        self.env_spec = env_spec
        self.policy = policy
        self.latent_dim = policy.latent_dim
        self.recurrent = recurrent
        self.predict_all = predict_all
        self.use_only_sign = use_only_sign
        self.noisify_traj_coef = noisify_traj_coef
        self.regressor_args = regressor_args
        # decide what obs variables will be regressed upon
        if obs_regressed == 'all':
            self.obs_regressed = list(
                range(env_spec.observation_space.flat_dim))
        else:
            self.obs_regressed = obs_regressed
        # decide what action variables will be regressed upon
        if act_regressed == 'all':
            self.act_regressed = list(range(env_spec.action_space.flat_dim))
        else:
            self.act_regressed = act_regressed
        # shape the input dimension of the NN for the above decisions.
        self.obs_act_dim = len(self.obs_regressed) + len(self.act_regressed)

        Serializable.quick_init(self, locals())  # ??

        if regressor_args is None:
            regressor_args = dict()

        if optimizer == 'first_order':
            self.optimizer = FirstOrderOptimizer(
                max_epochs=10,  # both of these are to match Rocky's 10
                batch_size=128,
            )
        elif optimizer is None:
            self.optimizer = None
        else:
            raise NotImplementedError

        if policy.latent_name == 'bernoulli':
            if self.recurrent:
                self._regressor = BernoulliRecurrentRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    predict_all=self.predict_all,
                    **regressor_args)
            else:
                self._regressor = BernoulliMLPRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    **regressor_args)
        elif policy.latent_name == 'categorical':
            if self.recurrent:
                self._regressor = CategoricalRecurrentRegressor(  # not implemented
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    # predict_all=self.predict_all,
                    **regressor_args)
            else:
                self._regressor = CategoricalMLPRegressor(
                    input_shape=(self.obs_act_dim, ),
                    output_dim=policy.latent_dim,
                    optimizer=self.optimizer,
                    **regressor_args)
        elif policy.latent_name == 'normal':
            self._regressor = GaussianMLPRegressor(
                input_shape=(self.obs_act_dim, ),
                output_dim=policy.latent_dim,
                optimizer=self.optimizer,
                **regressor_args)
        else:
            raise NotImplementedError

    def fit(self, paths):
        logger.log('fitting the regressor...')
        if self.recurrent:
            observations = np.array(
                [p["observations"][:, self.obs_regressed] for p in paths])
            actions = np.array(
                [p["actions"][:, self.act_regressed] for p in paths])
            obs_actions = np.concatenate([observations, actions], axis=2)
            if self.noisify_traj_coef:
                obs_actions += np.random.normal(
                    loc=0.0,
                    scale=float(np.mean(np.abs(obs_actions))) *
                    self.noisify_traj_coef,
                    size=np.shape(obs_actions))
            latents = np.array([p['agent_infos']['latents'] for p in paths])
            self._regressor.fit(
                obs_actions, latents)  # the input shapes are (traj, time, dim)
        else:
            observations = np.concatenate(
                [p["observations"][:, self.obs_regressed] for p in paths])
            actions = np.concatenate(
                [p["actions"][:, self.act_regressed] for p in paths])
            obs_actions = np.concatenate([observations, actions], axis=1)
            latents = np.concatenate(
                [p['agent_infos']["latents"] for p in paths])
            if self.noisify_traj_coef:
                obs_actions += np.random.normal(
                    loc=0.0,
                    scale=float(np.mean(np.abs(obs_actions))) *
                    self.noisify_traj_coef,
                    size=np.shape(obs_actions))
            self._regressor.fit(obs_actions,
                                latents.reshape(
                                    (-1, self.latent_dim)))  # why reshape??
        logger.log('done fitting the regressor')

    def predict(self, path):
        if self.recurrent:
            obs_actions = [
                np.concatenate([
                    path["observations"][:, self.obs_regressed],
                    path["actions"][:, self.act_regressed]
                ],
                               axis=1)
            ]  # is this the same??
        else:
            obs_actions = np.concatenate([
                path["observations"][:, self.obs_regressed],
                path["actions"][:, self.act_regressed]
            ],
                                         axis=1)
        if self.noisify_traj_coef:
            obs_actions += np.random.normal(
                loc=0.0,
                scale=float(np.mean(np.abs(obs_actions))) *
                self.noisify_traj_coef,
                size=np.shape(obs_actions))
        if self.use_only_sign:
            obs_actions = np.sign(obs_actions)
        return self._regressor.predict(obs_actions).flatten()

    def get_output_p(
        self, path
    ):  # this gives the p_dist for every step: the latent posterior wrt obs_act
        if self.recurrent:
            obs_actions = [
                np.concatenate([
                    path["observations"][:, self.obs_regressed],
                    path["actions"][:, self.act_regressed]
                ],
                               axis=1)
            ]  # is this the same??
        else:
            obs_actions = np.concatenate([
                path["observations"][:, self.obs_regressed],
                path["actions"][:, self.act_regressed]
            ],
                                         axis=1)
        if self.noisify_traj_coef:
            obs_actions += np.random.normal(
                loc=0.0,
                scale=float(np.mean(np.abs(obs_actions))) *
                self.noisify_traj_coef,
                size=np.shape(obs_actions))
        if self.use_only_sign:
            obs_actions = np.sign(obs_actions)
        if self.policy.latent_name == 'bernoulli':
            return self._regressor._f_p(obs_actions).flatten()
        elif self.policy.latent_name == 'normal':
            return self._regressor._f_pdists(obs_actions).flatten()

    def get_param_values(self, **tags):
        return self._regressor.get_param_values(**tags)

    def set_param_values(self, flattened_params, **tags):
        self._regressor.set_param_values(flattened_params, **tags)

    def predict_log_likelihood(self, paths, latents):
        if self.recurrent:
            observations = np.array(
                [p["observations"][:, self.obs_regressed] for p in paths])
            actions = np.array(
                [p["actions"][:, self.act_regressed] for p in paths])
            obs_actions = np.concatenate(
                [observations, actions],
                axis=2)  # latents must match first 2dim: (batch,time)
        else:
            observations = np.concatenate(
                [p["observations"][:, self.obs_regressed] for p in paths])
            actions = np.concatenate(
                [p["actions"][:, self.act_regressed] for p in paths])
            obs_actions = np.concatenate([observations, actions], axis=1)
            latents = np.concatenate(latents, axis=0)
        if self.noisify_traj_coef:
            noise = np.random.multivariate_normal(
                mean=np.zeros_like(np.mean(obs_actions, axis=0)),
                cov=np.diag(
                    np.mean(np.abs(obs_actions), axis=0) *
                    self.noisify_traj_coef),
                size=np.shape(obs_actions)[0])
            obs_actions += noise
        if self.use_only_sign:
            obs_actions = np.sign(obs_actions)
        return self._regressor.predict_log_likelihood(
            obs_actions, latents)  # see difference with fit above...

    def lowb_mutual(self, paths, times=(0, None)):
        if self.recurrent:
            observations = np.array([
                p["observations"][times[0]:times[1], self.obs_regressed]
                for p in paths
            ])
            actions = np.array([
                p["actions"][times[0]:times[1], self.act_regressed]
                for p in paths
            ])
            obs_actions = np.concatenate([observations, actions], axis=2)
            latents = np.array([
                p['agent_infos']['latents'][times[0]:times[1]] for p in paths
            ])
        else:
            observations = np.concatenate([
                p["observations"][times[0]:times[1], self.obs_regressed]
                for p in paths
            ])
            actions = np.concatenate([
                p["actions"][times[0]:times[1], self.act_regressed]
                for p in paths
            ])
            obs_actions = np.concatenate([observations, actions], axis=1)
            latents = np.concatenate([
                p['agent_infos']["latents"][times[0]:times[1]] for p in paths
            ])
        if self.noisify_traj_coef:
            obs_actions += np.random.multivariate_normal(
                mean=np.zeros_like(np.mean(obs_actions, axis=0)),
                cov=np.diag(
                    np.mean(np.abs(obs_actions), axis=0) *
                    self.noisify_traj_coef),
                size=np.shape(obs_actions)[0])
        if self.use_only_sign:
            obs_actions = np.sign(obs_actions)
        H_latent = self.policy.latent_dist.entropy(
            self.policy.latent_dist_info)  # sum of entropies latents in

        return H_latent + np.mean(
            self._regressor.predict_log_likelihood(obs_actions, latents))

    def log_diagnostics(self, paths):
        logger.record_tabular(self._regressor._name + 'LowerB_MI',
                              self.lowb_mutual(paths))
        logger.record_tabular(self._regressor._name + 'LowerB_MI_5first',
                              self.lowb_mutual(paths, times=(0, 5)))
        logger.record_tabular(self._regressor._name + 'LowerB_MI_5last',
                              self.lowb_mutual(paths, times=(-5, None)))