Example #1
0
class RecurrentCategorical(Distribution):
    def __init__(self, dim):
        self._cat = Categorical(dim)
        self._dim = dim

    @property
    def dim(self):
        return self._dim

    def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
        """
        Compute the symbolic KL divergence of two categorical distributions
        """
        old_prob_var = old_dist_info_vars["prob"]
        new_prob_var = new_dist_info_vars["prob"]
        # Assume layout is N * T * A
        return TT.sum(old_prob_var * (TT.log(old_prob_var + TINY) - TT.log(new_prob_var + TINY)), axis=2)

    def kl(self, old_dist_info, new_dist_info):
        """
        Compute the KL divergence of two categorical distributions
        """
        old_prob = old_dist_info["prob"]
        new_prob = new_dist_info["prob"]
        return np.sum(old_prob * (np.log(old_prob + TINY) - np.log(new_prob + TINY)), axis=2)

    def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
        old_prob_var = old_dist_info_vars["prob"]
        new_prob_var = new_dist_info_vars["prob"]
        # Assume layout is N * T * A
        a_dim = x_var.shape[-1]
        flat_ratios = self._cat.likelihood_ratio_sym(
            x_var.reshape((-1, a_dim)),
            dict(prob=old_prob_var.reshape((-1, a_dim))),
            dict(prob=new_prob_var.reshape((-1, a_dim))),
        )
        return flat_ratios.reshape(old_prob_var.shape[:2])

    def entropy(self, dist_info):
        probs = dist_info["prob"]
        return -np.sum(probs * np.log(probs + TINY), axis=2)

    def log_likelihood_sym(self, xs, dist_info_vars):
        probs = dist_info_vars["prob"]
        # Assume layout is N * T * A
        a_dim = probs.shape[-1]
        # a_dim = TT.printing.Print("lala")(a_dim)
        flat_logli = self._cat.log_likelihood_sym(xs.reshape((-1, a_dim)), dict(prob=probs.reshape((-1, a_dim))))
        return flat_logli.reshape(probs.shape[:2])

    def log_likelihood(self, xs, dist_info):
        probs = dist_info["prob"]
        # Assume layout is N * T * A
        a_dim = probs.shape[-1]
        flat_logli = self._cat.log_likelihood_sym(xs.reshape((-1, a_dim)), dict(prob=probs.reshape((-1, a_dim))))
        return flat_logli.reshape(probs.shape[:2])

    @property
    def dist_info_keys(self):
        return ["prob"]
    def __init__(
            self,
            env_spec,
            hidden_sizes=(),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
            neat_output_dim=20,
            neat_network=None,
            prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)
        # create random NEAT MLP
        if neat_network is None:
            neat_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
                output_dim=neat_output_dim,
                hidden_sizes=(12, 12),
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.identity,
            )

        if prob_network is None:
            prob_network = MLP(
                input_shape=(L.get_output_shape(neat_network.output_layer)[1],),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._phi = neat_network.output_layer
        self._obs = neat_network.input_layer
        self._neat_output = ext.compile_function([neat_network.input_layer.input_var], L.get_output(neat_network.output_layer))

        self.prob_network = prob_network
        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function([prob_network.input_layer.input_var], L.get_output(prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(PowerGradientPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])
    def __init__(
            self,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
            name=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            if not name:
                name = "categorical_conv_prob_network"
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=output_nonlinearity,
                name=name,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])
    def __init__(
            self,
            name,
            env_spec,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.tanh,
            num_seq_inputs=1,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec
        
        # print( env_spec.observation_space.shape )


        q_network = MLP(
            input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
            output_dim=env_spec.action_space.n,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearity=NL.linear,
            name=name
        )
        
        self._l_q = q_network.output_layer
        self._l_obs = q_network.input_layer
        self._f_q = ext.compile_function(
            [q_network.input_layer.input_var],
            L.get_output(q_network.output_layer)
        )

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMlpQPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [q_network.output_layer])
Example #5
0
    def __init__(self, discrete_dim, chain_trigger, chain_distr):
        """
        Args:
            discrete_dim: Cardinality of the categorical distribution.
            chain_trigger: Value of the categorical distribution which should
                trigger the chained distribution.
            chain_distribution: A child `Distribution` instance which is
                triggered when the parent categorical distribution selects the
                particular value `chain_trigger`. This should be a discrete
                distribution; bad things will happen if it is not one.
        """
        self._prior_distr = Categorical(discrete_dim)
        self._chain_trigger = chain_trigger
        self._chain_distr = chain_distr

        # This is easier to code if the chain trigger is the final choice in
        # the categorical space.
        assert self._chain_trigger == discrete_dim - 1
Example #6
0
    def __init__(
        self,
        env_spec,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=NL.tanh,
        prob_network=None,
    ):
        """
        :param env_spec: A spec for the mdp.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        if prob_network is None:
            prob_network = MLP(
                input_shape=(env_spec.observation_space.flat_dim, ),
                output_dim=env_spec.action_space.n,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(prob_network.output_layer))

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])
    def __init__(
            self,
            name,
            env_spec,
            conv_filters, conv_filter_sizes, conv_strides, conv_pads,
            hidden_sizes=[],
            hidden_nonlinearity=NL.rectify,
            output_nonlinearity=NL.softmax,
            prob_network=None,
            feature_layer_index=-2,
            eps=0,
    ):
        """
        The policy consists of several convolution layers followed by fc layers and softmax
        :param env_spec: A spec for the mdp.
        :param conv_filters, conv_filter_sizes, conv_strides, conv_pads: specify the convolutional layers. See rllab.core.network.ConvNetwork for details.
        :param hidden_sizes: list of sizes for the fully connected hidden layers
        :param hidden_nonlinearity: nonlinearity used for each hidden layer
        :param prob_network: manually specified network for this policy, other network params
        are ignored
        :param feature_layer_index: index of the feature layer. Default -2 means the last layer before fc-softmax
        :param eps: mixture weight on uniform distribution; useful to force exploration
        :return:
        """
        Serializable.quick_init(self, locals())

        assert isinstance(env_spec.action_space, Discrete)

        self._env_spec = env_spec

        if prob_network is None:
            prob_network = ConvNetwork(
                input_shape=env_spec.observation_space.shape,
                output_dim=env_spec.action_space.n,
                conv_filters=conv_filters,
                conv_filter_sizes=conv_filter_sizes,
                conv_strides=conv_strides,
                conv_pads=conv_pads,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
                name="prob_network",
            )

        self._l_prob = prob_network.output_layer
        self._l_obs = prob_network.input_layer

        # mix in uniform distribution
        n_actions = env_spec.action_space.n
        uniform_prob = np.ones(n_actions,dtype=theano.config.floatX) / n_actions
        eps_var = theano.shared(
            eps,
            name="eps",
        )
        nn_prob = L.get_output(prob_network.output_layer)
        final_prob = (1-eps_var) * nn_prob + eps_var * uniform_prob
        self._f_prob = ext.compile_function(
            [prob_network.input_layer.input_var],
            final_prob,
        )
        self._eps_var = eps_var

        self._feature_layer_index = feature_layer_index
        feature_layer = L.get_all_layers(prob_network.output_layer)[feature_layer_index] # layer before fc-softmax
        self._f_feature = ext.compile_function(
            [prob_network.input_layer.input_var],
            L.get_output(feature_layer)
        )
        self._feature_shape = L.get_output_shape(feature_layer)[1:]

        self._dist = Categorical(env_spec.action_space.n)

        super(CategoricalConvPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [prob_network.output_layer])
Example #8
0
class RecurrentCategorical(Distribution):
    def __init__(self):
        self._cat = Categorical()

    def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
        """
        Compute the symbolic KL divergence of two categorical distributions
        """
        old_prob_var = old_dist_info_vars["prob"]
        new_prob_var = new_dist_info_vars["prob"]
        # Assume layout is N * T * A
        return TT.sum(
            old_prob_var *
            (TT.log(old_prob_var + TINY) - TT.log(new_prob_var + TINY)),
            axis=2)

    def kl(self, old_dist_info, new_dist_info):
        """
        Compute the KL divergence of two categorical distributions
        """
        old_prob = old_dist_info["prob"]
        new_prob = new_dist_info["prob"]
        return np.sum(old_prob *
                      (np.log(old_prob + TINY) - np.log(new_prob + TINY)),
                      axis=2)

    def likelihood_ratio_sym(self, x_var, old_dist_info_vars,
                             new_dist_info_vars):
        old_prob_var = old_dist_info_vars["prob"]
        new_prob_var = new_dist_info_vars["prob"]
        # Assume layout is N * T * A
        a_dim = x_var.shape[-1]
        flat_ratios = self._cat.likelihood_ratio_sym(
            x_var.reshape((-1, a_dim)),
            dict(prob=old_prob_var.reshape((-1, a_dim))),
            dict(prob=new_prob_var.reshape((-1, a_dim))))
        return flat_ratios.reshape(old_prob_var.shape[:2])

    def entropy(self, dist_info):
        probs = dist_info["prob"]
        return -np.sum(probs * np.log(probs + TINY), axis=2)

    def log_likelihood_sym(self, xs, dist_info_vars):
        probs = dist_info_vars["prob"]
        # Assume layout is N * T * A
        a_dim = probs.shape[-1]
        # a_dim = TT.printing.Print("lala")(a_dim)
        flat_logli = self._cat.log_likelihood_sym(
            xs.reshape((-1, a_dim)), dict(prob=probs.reshape((-1, a_dim))))
        return flat_logli.reshape(probs.shape[:2])

    def log_likelihood(self, xs, dist_info):
        probs = dist_info["prob"]
        # Assume layout is N * T * A
        a_dim = probs.shape[-1]
        flat_logli = self._cat.log_likelihood_sym(
            xs.reshape((-1, a_dim)), dict(prob=probs.reshape((-1, a_dim))))
        return flat_logli.reshape(probs.shape[:2])

    @property
    def dist_info_keys(self):
        return ["prob"]
Example #9
0
 def __init__(self):
     self._cat = Categorical()
    def __init__(
        self,
        input_shape,
        output_dim,
        predict_all=False,  # CF
        prob_network=None,
        hidden_sizes=(32, 32),
        hidden_nonlinearity=NL.rectify,
        optimizer=None,
        use_trust_region=True,
        step_size=0.01,
        normalize_inputs=True,
        name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self.output_dim = output_dim
        self._optimizer = optimizer

        if prob_network is None:
            prob_network = GRUNetwork(
                input_shape=input_shape,
                output_dim=output_dim,
                hidden_dim=hidden_sizes[0],  # this gives 32 by default
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        l_prob = prob_network.output_layer

        LasagnePowered.__init__(self, [l_prob])

        xs_var = prob_network.input_layer.input_var
        ys_var = TT.itensor3("ys")
        old_prob_var = TT.tensor3("old_prob")

        x_mean_var = theano.shared(
            np.zeros(
                (
                    1,
                    1,
                ) + input_shape
            ),  # this syntax makes the shape (1,1,*input_shape,). The first is traj
            name="x_mean",
            broadcastable=(
                True,
                True,
            ) + (False, ) * len(input_shape))
        x_std_var = theano.shared(np.ones((
            1,
            1,
        ) + input_shape),
                                  name="x_std",
                                  broadcastable=(
                                      True,
                                      True,
                                  ) + (False, ) * len(input_shape))

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var

        prob_var_all = L.get_output(
            l_prob, {prob_network.input_layer: normalized_xs_var})

        if predict_all:
            prob_var = prob_var_all
        else:
            # take only last dim but keep the shape
            prob_var_last = TT.reshape(
                prob_var_all[:, -1, :],
                (TT.shape(prob_var_all)[0], 1, TT.shape(prob_var_all)[2]))
            # padd along the time dimension to obtain the same shape as before
            padded_prob_var = TT.tile(prob_var_last,
                                      (1, TT.shape(prob_var_all)[1], 1))
            # give it the standard name
            prob_var = padded_prob_var

        old_info_vars = dict(prob=old_prob_var)
        info_vars = dict(prob=prob_var)

        dist = self._dist = Categorical(output_dim)

        mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars))

        loss = -TT.mean(dist.log_likelihood_sym(ys_var, info_vars))

        predicted_flat = special.to_onehot_sym(
            TT.flatten(TT.argmax(prob_var, axis=-1)), output_dim)
        predicted = TT.reshape(predicted_flat, TT.shape(prob_var))

        self._f_predict = ext.compile_function([xs_var], predicted)
        self._f_prob = ext.compile_function([xs_var], prob_var)
        self._prob_network = prob_network
        self._l_prob = l_prob

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[prob_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var
Example #11
0
    def __init__(
            self,
            input_shape,
            output_dim,
            prob_network=None,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=NL.rectify,
            optimizer=None,
            use_trust_region=True,
            step_size=0.01,
            normalize_inputs=True,
            name=None,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        if optimizer is None:
            if use_trust_region:
                optimizer = PenaltyLbfgsOptimizer()
            else:
                optimizer = LbfgsOptimizer()

        self.output_dim = output_dim
        self._optimizer = optimizer

        if prob_network is None:
            prob_network = MLP(
                input_shape=input_shape,
                output_dim=output_dim,
                hidden_sizes=hidden_sizes,
                hidden_nonlinearity=hidden_nonlinearity,
                output_nonlinearity=NL.softmax,
            )

        l_prob = prob_network.output_layer

        LasagnePowered.__init__(self, [l_prob])

        xs_var = prob_network.input_layer.input_var
        ys_var = TT.imatrix("ys")
        old_prob_var = TT.matrix("old_prob")

        x_mean_var = theano.shared(
            np.zeros((1,) + input_shape),
            name="x_mean",
            broadcastable=(True,) + (False,) * len(input_shape)
        )
        x_std_var = theano.shared(
            np.ones((1,) + input_shape),
            name="x_std",
            broadcastable=(True,) + (False,) * len(input_shape)
        )

        normalized_xs_var = (xs_var - x_mean_var) / x_std_var

        prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var})

        old_info_vars = dict(prob=old_prob_var)
        info_vars = dict(prob=prob_var)

        dist = self._dist = Categorical(output_dim)

        mean_kl = TT.mean(dist.kl_sym(old_info_vars, info_vars))

        loss = - TT.mean(dist.log_likelihood_sym(ys_var, info_vars))

        predicted = special.to_onehot_sym(TT.argmax(prob_var, axis=1), output_dim)

        self._f_predict = ext.compile_function([xs_var], predicted)
        self._f_prob = ext.compile_function([xs_var], prob_var)
        self._prob_network = prob_network
        self._l_prob = l_prob

        optimizer_args = dict(
            loss=loss,
            target=self,
            network_outputs=[prob_var],
        )

        if use_trust_region:
            optimizer_args["leq_constraint"] = (mean_kl, step_size)
            optimizer_args["inputs"] = [xs_var, ys_var, old_prob_var]
        else:
            optimizer_args["inputs"] = [xs_var, ys_var]

        self._optimizer.update_opt(**optimizer_args)

        self._use_trust_region = use_trust_region
        self._name = name

        self._normalize_inputs = normalize_inputs
        self._x_mean_var = x_mean_var
        self._x_std_var = x_std_var
 def __init__(self, dim):
     self._cat = Categorical(dim)
     self._dim = dim
 def __init__(self):
     self._cat = Categorical()
Example #14
0
 def __init__(self, dim):
     self._cat = Categorical(dim)
     self._dim = dim