Example #1
0
    def __init__(self,
                 optimizer=None,
                 optimizer_args=None,
                 observation_permutation=None,
                 action_permutation=None,
                 sym_loss_weight=0.0001,
                 clip_param=0.2,
                 adam_batchsize=128,
                 adam_epochs=10,
                 **kwargs):
        if optimizer is None:
            if optimizer_args is None:
                optimizer_args = dict()
            optimizer = LbfgsOptimizer(**optimizer_args)
        super(PPO_Clip_Sym, self).__init__(optimizer=optimizer, **kwargs)

        self.observation_permutation = observation_permutation
        self.action_permutation = action_permutation
        self.sym_loss_weight = sym_loss_weight

        self.clip_param = clip_param

        self.adam_batchsize = adam_batchsize
        self.adam_epochs = adam_epochs

        self.obs_perm_mat = np.zeros(
            (len(observation_permutation), len(observation_permutation)))
        self.act_per_mat = np.zeros(
            (len(action_permutation), len(action_permutation)))
        for i, perm in enumerate(self.observation_permutation):
            self.obs_perm_mat[i][int(np.abs(perm))] = np.sign(perm)
        for i, perm in enumerate(self.action_permutation):
            self.act_per_mat[i][int(np.abs(perm))] = np.sign(perm)
Example #2
0
 def __init__(self,
              optimizer=None,
              optimizer_args=None,
              positive_adv=None,
              **kwargs):
     Serializable.quick_init(self, locals())
     if optimizer is None:
         if optimizer_args is None:
             optimizer_args = dict()
         optimizer = LbfgsOptimizer(**optimizer_args)
     super(ERWR, self).__init__(
         optimizer=optimizer,
         positive_adv=True if positive_adv is None else positive_adv,
         **kwargs)
Example #3
0
    def __init__(
        self,
        input_shape,
        output_dim,
        mean_network=None,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=NL.rectify,
        optimizer=None,
        use_trust_region=True,
        step_size=0.01,
        learn_std=True,
        init_std=1.0,
        adaptive_std=False,
        std_share_network=False,
        std_hidden_sizes=(32, 32),
        std_nonlinearity=None,
        normalize_inputs=True,
        normalize_outputs=True,
        name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        :param learn_std: Whether to learn the standard deviations. Only effective if adaptive_std is False. If
        adaptive_std is True, this parameter is ignored, and the weights for the std network are always learned.
        :param adaptive_std: Whether to make the std a function of the states.
        :param std_share_network: Whether to use the same network as the mean.
        :param std_hidden_sizes: Number of hidden units of each layer of the std network. Only used if
        `std_share_network` is False. It defaults to the same architecture as the mean.
        :param std_nonlinearity: Non-linearity used for each layer of the std network. Only used if `std_share_network`
        is False. It defaults to the same non-linearity as the mean.
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self._optimizer = optimizer

        if mean_network is None:
            mean_network = MLP(
                input_shape=input_shape,
                output_dim=output_dim,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=None,
            )

        l_mean = mean_network.output_layer

        if adaptive_std:
            l_log_std = MLP(
                input_shape=input_shape,
                input_var=mean_network.input_layer.input_var,
                output_dim=output_dim,
                hidden_sizes=std_hidden_sizes,
                hidden_nonlinearity=std_nonlinearity,
                output_nonlinearity=None,
            ).output_layer
        else:
            l_log_std = ParamLayer(
                mean_network.input_layer,
                num_units=output_dim,
                param=lasagne.init.Constant(np.log(init_std)),
                name="output_log_std",
                trainable=learn_std,
            )

        LasagnePowered.__init__(self, [l_mean, l_log_std])

        xs_var = mean_network.input_layer.input_var
        ys_var = TT.matrix("ys")
        old_means_var = TT.matrix("old_means")
        old_log_stds_var = TT.matrix("old_log_stds")

        x_mean_var = theano.shared(np.zeros((1, ) + input_shape),
                                   name="x_mean",
                                   broadcastable=(True, ) +
                                   (False, ) * len(input_shape))
        x_std_var = theano.shared(np.ones((1, ) + input_shape),
                                  name="x_std",
                                  broadcastable=(True, ) +
                                  (False, ) * len(input_shape))
        y_mean_var = theano.shared(np.zeros((1, output_dim)),
                                   name="y_mean",
                                   broadcastable=(True, False))
        y_std_var = theano.shared(np.ones((1, output_dim)),
                                  name="y_std",
                                  broadcastable=(True, False))

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var
        normalized_ys_var = (ys_var - y_mean_var) / y_std_var

        normalized_means_var = L.get_output(
            l_mean, {mean_network.input_layer: normalized_xs_var})
        normalized_log_stds_var = L.get_output(
            l_log_std, {mean_network.input_layer: normalized_xs_var})

        means_var = normalized_means_var * y_std_var + y_mean_var
        log_stds_var = normalized_log_stds_var + TT.log(y_std_var)

        normalized_old_means_var = (old_means_var - y_mean_var) / y_std_var
        normalized_old_log_stds_var = old_log_stds_var - TT.log(y_std_var)

        dist = self._dist = DiagonalGaussian()

        normalized_dist_info_vars = dict(mean=normalized_means_var,
                                         log_std=normalized_log_stds_var)

        mean_kl = TT.mean(
            dist.kl_sym(
                dict(mean=normalized_old_means_var,
                     log_std=normalized_old_log_stds_var),
                normalized_dist_info_vars,
            ))

        loss = -TT.mean(
            dist.log_likelihood_sym(normalized_ys_var,
                                    normalized_dist_info_vars))

        self._f_predict = compile_function([xs_var], means_var)
        self._f_pdists = compile_function([xs_var], [means_var, log_stds_var])
        self._l_mean = l_mean
        self._l_log_std = l_log_std

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[normalized_means_var, normalized_log_stds_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [
                xs_var, ys_var, old_means_var, old_log_stds_var
            ]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._normalize_outputs = normalize_outputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var
        self._y_mean_var = y_mean_var
        self._y_std_var = y_std_var
    def __init__(
        self,
        input_shape,
        output_dim,
        predict_all=False,  # CF
        prob_network=None,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=NL.rectify,
        optimizer=None,
        use_trust_region=True,
        step_size=0.01,
        normalize_inputs=True,
        name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self.output_dim = output_dim
        self._optimizer = optimizer

        if prob_network is None:
            prob_network = GRUNetwork(
                input_shape=input_shape,
                output_dim=output_dim,
                hidden_dim=hidden_sizes[0],  # this gives 32 by default
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        l_prob = prob_network.output_layer

        LasagnePowered.__init__(self, [l_prob])

        xs_var = prob_network.input_layer.input_var
        ys_var = TT.itensor3("ys")
        old_prob_var = TT.tensor3("old_prob")

        x_mean_var = theano.shared(
            np.zeros(
                (
                    1,
                    1,
                ) + input_shape
            ),  # this syntax makes the shape (1,1,*input_shape,). The first is traj
            name="x_mean",
            broadcastable=(
                True,
                True,
            ) + (False, ) * len(input_shape))
        x_std_var = theano.shared(np.ones((
            1,
            1,
        ) + input_shape),
                                  name="x_std",
                                  broadcastable=(
                                      True,
                                      True,
                                  ) + (False, ) * len(input_shape))

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var

        prob_var_all = L.get_output(
            l_prob, {prob_network.input_layer: normalized_xs_var})

        if predict_all:
            prob_var = prob_var_all
        else:
            # take only last dim but keep the shape
            prob_var_last = TT.reshape(
                prob_var_all[:, -1, :],
                (TT.shape(prob_var_all)[0], 1, TT.shape(prob_var_all)[2]))
            # padd along the time dimension to obtain the same shape as before
            padded_prob_var = TT.tile(prob_var_last,
                                      (1, TT.shape(prob_var_all)[1], 1))
            # give it the standard name
            prob_var = padded_prob_var

        old_info_vars = dict(prob=old_prob_var)
        info_vars = dict(prob=prob_var)

        dist = self._dist = Categorical(output_dim)

        mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars))

        loss = -TT.mean(dist.log_likelihood_sym(ys_var, info_vars))

        predicted_flat = special.to_onehot_sym(
            TT.flatten(TT.argmax(prob_var, axis=-1)), output_dim)
        predicted = TT.reshape(predicted_flat, TT.shape(prob_var))

        self._f_predict = ext.compile_function([xs_var], predicted)
        self._f_prob = ext.compile_function([xs_var], prob_var)
        self._prob_network = prob_network
        self._l_prob = l_prob

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[prob_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var
Example #5
0
    def __init__(
            self,
            input_shape,
            output_dim,
            prob_network=None,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            optimizer=None,
            use_trust_region=True,
            step_size=0.01,
            normalize_inputs=True,
            name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self.output_dim = output_dim
        self._optimizer = optimizer

        if prob_network is None:
            prob_network = MLP(
                input_shape=input_shape,
                output_dim=output_dim,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        l_prob = prob_network.output_layer

        LasagnePowered.__init__(self, [l_prob])

        xs_var = prob_network.input_layer.input_var
        ys_var = TT.imatrix("ys")
        old_prob_var = TT.matrix("old_prob")

        x_mean_var = theano.shared(
            np.zeros((1,) + input_shape),
            name="x_mean",
            broadcastable=(True,) + (False,) * len(input_shape)
        )
        x_std_var = theano.shared(
            np.ones((1,) + input_shape),
            name="x_std",
            broadcastable=(True,) + (False,) * len(input_shape)
        )

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var

        prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var})

        old_info_vars = dict(prob=old_prob_var)
        info_vars = dict(prob=prob_var)

        dist = self._dist = Categorical(output_dim)

        mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars))

        loss = - TT.mean(dist.log_likelihood_sym(ys_var, info_vars))

        predicted = special.to_onehot_sym(TT.argmax(prob_var, axis=1), output_dim)

        self._f_predict = ext.compile_function([xs_var], predicted)
        self._f_prob = ext.compile_function([xs_var], prob_var)
        self._prob_network = prob_network
        self._l_prob = l_prob

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[prob_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var
Example #6
0
        raise ValueError('No values provided for param %s' % param_name)
    else:
        try:
            param_values = [[float(y) for y in x.split(',')]
                            for x in args.param_val.split(',,')]
        except:
            param_values = [[y for y in x.split(',')]
                            for x in args.param_val.split(',,')]

############################################################################
## POSTPROCESSING OF PARAMETERS
if args.logdir[-1] != '/':
    args.logdir += '/'

if params['hide_baseline_net_params']['optimizer'] == 'LbfgsOptimizer':
    params['hide_baseline_net_params']['optimizer'] = LbfgsOptimizer(
        max_opt_itr=params['hide_baseline_net_params']['max_opt_itr'])
    params['hide_baseline_net_params'].pop('max_opt_itr', None)
else:
    raise ValueError('Unknown optimizer: %s',
                     params['hide_baseline_net_params']['optimizer'])

if params['seek_baseline_net_params']['optimizer'] == 'LbfgsOptimizer':
    params['seek_baseline_net_params']['optimizer'] = LbfgsOptimizer(
        max_opt_itr=params['seek_baseline_net_params']['max_opt_itr'])
    params['seek_baseline_net_params'].pop('max_opt_itr', None)
else:
    raise ValueError('Unknown optimizer: %s',
                     params['seek_baseline_net_params']['optimizer'])

## All possible combinations of hyperparameters
param_values.append(seeds)
    def __init__(
        self,
        input_shape,
        output_dim,
        predict_all=False,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=NL.rectify,
        optimizer=None,
        use_trust_region=True,
        step_size=0.01,
        normalize_inputs=True,
        name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param predict_all: use the prediction made at every step about the latent variables (not only the last step)
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self.output_dim = output_dim
        self._optimizer = optimizer

        p_network = GRUNetwork(
            input_shape=input_shape,
            output_dim=output_dim,
            hidden_dim=hidden_sizes[0],
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearity=NL.sigmoid,
        )

        l_p = p_network.output_layer  # this is every intermediate latent state! but I only care about last

        LasagnePowered.__init__(self, [l_p])

        xs_var = p_network.input_layer.input_var

        ys_var = TT.itensor3("ys")  # this is 3D: (traj, time, lat_dim)
        old_p_var = TT.tensor3("old_p")
        x_mean_var = theano.shared(np.zeros((
            1,
            1,
        ) + input_shape),
                                   name="x_mean",
                                   broadcastable=(
                                       True,
                                       True,
                                   ) + (False, ) * len(input_shape))

        x_std_var = theano.shared(np.ones((
            1,
            1,
        ) + input_shape),
                                  name="x_std",
                                  broadcastable=(
                                      True,
                                      True,
                                  ) + (False, ) * len(input_shape))

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var
        # this is the previous p_var, from which I only want the last time-step padded along all time-steps
        p_var_all = L.get_output(l_p,
                                 {p_network.input_layer: normalized_xs_var})
        # take only last dim but keep the shape
        p_var_last = TT.reshape(
            p_var_all[:, -1, :],
            (TT.shape(p_var_all)[0], 1, TT.shape(p_var_all)[2]))
        # padd along the time dimension to obtain the same shape as before
        padded_p = TT.tile(p_var_last, (1, TT.shape(p_var_all)[1], 1))
        # give it the standard name
        if predict_all:
            p_var = p_var_all
        else:
            p_var = padded_p

        old_info_vars = dict(p=old_p_var)
        info_vars = dict(
            p=p_var
        )  # posterior of the latent at every step, wrt obs-act. Same along batch if recurrent

        dist = self._dist = Bernoulli(output_dim)

        mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars))

        loss = -TT.mean(dist.log_likelihood_sym(
            ys_var,
            info_vars))  # regressor just wants to min -loglik of data ys

        predicted = p_var >= 0.5

        self._f_predict = ext.compile_function([xs_var], predicted)
        self._f_p = ext.compile_function(
            [xs_var], p_var
        )  # for consistency with gauss_mlp_reg this should be ._f_pdists

        self._l_p = l_p

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[p_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [xs_var, ys_var, old_p_var]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var
Example #8
0
    def __init__(
        self,
        input_shape,
        output_dim,
        predict_all=True,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=NL.rectify,
        optimizer=None,
        use_trust_region=True,
        step_size=0.01,
        normalize_inputs=True,
        name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self.output_dim = output_dim
        self._optimizer = optimizer

        p_network = MLP(
            input_shape=input_shape,
            output_dim=output_dim,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearity=NL.sigmoid,
        )

        l_p = p_network.output_layer

        LasagnePowered.__init__(self, [l_p])

        xs_var = p_network.input_layer.input_var

        ys_var = TT.imatrix("ys")
        old_p_var = TT.matrix("old_p")
        x_mean_var = theano.shared(np.zeros((1, ) + input_shape),
                                   name="x_mean",
                                   broadcastable=(True, ) +
                                   (False, ) * len(input_shape))
        x_std_var = theano.shared(np.ones((1, ) + input_shape),
                                  name="x_std",
                                  broadcastable=(True, ) +
                                  (False, ) * len(input_shape))

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var

        p_var = L.get_output(l_p, {p_network.input_layer: normalized_xs_var})

        old_info_vars = dict(p=old_p_var)
        info_vars = dict(
            p=p_var
        )  # posterior of the latent at every step, wrt obs-act. Same along batch if recurrent

        dist = self._dist = Bernoulli(output_dim)

        mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars))
        self._mean_kl = ext.compile_function(
            [xs_var, old_p_var], mean_kl)  # if not using TR, still log KL

        loss = -TT.mean(dist.log_likelihood_sym(
            ys_var,
            info_vars))  # regressor just wants to min -loglik of data ys

        predicted = p_var >= 0.5  # this gives 0 or 1, depending what is closer to the p_var

        self._f_predict = ext.compile_function([xs_var], predicted)
        self._f_p = ext.compile_function(
            [xs_var], p_var
        )  # for consistency with gauss_mlp_reg this should be ._f_pdists
        self._l_p = l_p

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[p_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [xs_var, ys_var, old_p_var]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var