コード例 #1
0
ファイル: test_spaces.py プロジェクト: zhmz90/rllab
def test_product_space_unflatten_n():
    space = Product([Discrete(3), Discrete(3)])
    np.testing.assert_array_equal(space.flatten((2, 2)), space.flatten_n([(2, 2)])[0])
    np.testing.assert_array_equal(
        space.unflatten(space.flatten((2, 2))),
        space.unflatten_n(space.flatten_n([(2, 2)]))[0]
    )
コード例 #2
0
 def action_space(self):
     return Product([
         Discrete(11),
         Discrete(11),
         Discrete(11),
         Discrete(2),
         Discrete(2)
     ])
コード例 #3
0
 def action_space(self):
     """
     Returns a Space object
     :rtype: rllab.spaces.base.Space
     Action space is [Agent's desired X,Y loc represented in cartesian product form]
     """
     return Discrete(5)
コード例 #4
0
 def observation_space(self):
     """
     Returns a Space object
     :rtype: rllab.spaces.base.Space
     State is [Agent X loc, Agent Y loc, Fire 1, Fire 2,... Fire N Alive?]
     Represented in cartesian product form as a Discrete space
     """
     return Discrete(self.n_row * self.n_col * 2**self.n_fires)
コード例 #5
0
    def __init__(self, transition_matrix, reward, init_state, terminate_on_reward=False):
        super(DiscreteEnv, self).__init__()
        dX, dA, dXX = transition_matrix.shape
        self.nstates = dX
        self.nactions = dA
        self.transitions = transition_matrix
        self.init_state = init_state
        self.reward = reward
        self.terminate_on_reward = terminate_on_reward

        self.__observation_space = Box(0, 1, shape=(self.nstates,))
        #max_A = 0
        #for trans in self.transitions:
        #    max_A = max(max_A, len(self.transitions[trans]))
        self.__action_space = Discrete(dA)
コード例 #6
0
 def observation_space(self):
     """
     Returns a Space object
     """
     return Product(Discrete(52), # Player card 1
                    Discrete(52), # Player card 2
                    Discrete(52), # Player card 3
                    Discrete(52), # Dealer card 1
                    Discrete(52), # Dealer card 2
                    Discrete(52)) # Dealer card 3
コード例 #7
0
 def action_space(self):
     return Discrete(11 * 11 * 11 * 2 * 2)
コード例 #8
0
 def __init__(self, ns):
     self.agent_num = len(ns)
     self.agent_spaces = np.array([Discrete(n) for n in ns])
コード例 #9
0
 def action_space(self):
     return Discrete(len(self.policies))
コード例 #10
0
 def observation_space(self):
     return Discrete(4)
コード例 #11
0
ファイル: grid_env_r.py プロジェクト: maxiaoba/QMDPNET
 def action_space(self):
     return Discrete(self.num_action)
コード例 #12
0
 def action_space(self):
     return Discrete(self.domain.actions_num)
コード例 #13
0
ファイル: atari.py プロジェクト: maxiaoba/QMDPNET
 def action_space(self):
     return Discrete(len(self._action_set))
コード例 #14
0
 def action_space(self):
     return Discrete(len(self._controllers))
コード例 #15
0
 def action_space(self):
     return Discrete(n=self._k)
コード例 #16
0
class CategoricalMLPRegressor(LayersPowered, Serializable):
    """
    A class for performing regression (or classification, really) by fitting a categorical distribution to the outputs.
    Assumes that the outputs will be always a one hot vector.
    """

    def __init__(
            self,
            name,
            input_shape,
            output_dim,
            prob_network=None,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.tanh,
            optimizer=None,
            tr_optimizer=None,
            use_trust_region=True,
            step_size=0.01,
            normalize_inputs=True,
            no_initial_trust_region=True,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if optimizer is None:
                optimizer = LbfgsOptimizer(name="optimizer")
            if tr_optimizer is None:
                tr_optimizer = ConjugateGradientOptimizer()

            self.input_dim = input_shape[0]
            self.observation_space = Discrete(self.input_dim)
            self.action_space = Discrete(output_dim)


            self.output_dim = output_dim
            self.optimizer = optimizer
            self.tr_optimizer = tr_optimizer

            if prob_network is None:
                prob_network = MLP(
                    input_shape=input_shape,
                    output_dim=output_dim,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    name="prob_network"
                )

            l_prob = prob_network.output_layer

            LayersPowered.__init__(self, [l_prob])

            xs_var = prob_network.input_layer.input_var
            ys_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="ys")
            old_prob_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="old_prob")

            x_mean_var = tf.get_variable(
                name="x_mean",
                shape=(1,) + input_shape,
                initializer=tf.constant_initializer(0., dtype=tf.float32)
            )
            x_std_var = tf.get_variable(
                name="x_std",
                shape=(1,) + input_shape,
                initializer=tf.constant_initializer(1., dtype=tf.float32)
            )

            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var

            prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var})

            old_info_vars = dict(prob=old_prob_var)
            info_vars = dict(prob=prob_var)

            dist = self._dist = Categorical(output_dim)

            mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars))

            loss = - tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars))

            predicted = tensor_utils.to_onehot_sym(tf.argmax(prob_var, axis=1), output_dim)

            self.prob_network = prob_network
            self.f_predict = tensor_utils.compile_function([xs_var], predicted)
            self.f_prob = tensor_utils.compile_function([xs_var], prob_var)
            self.l_prob = l_prob

            self.optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var])
            self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var],
                                         inputs=[xs_var, ys_var, old_prob_var],
                                         leq_constraint=(mean_kl, step_size)
                                         )

            self.use_trust_region = use_trust_region
            self.name = name

            self.normalize_inputs = normalize_inputs
            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var
            self.first_optimized = not no_initial_trust_region

    def fit(self, xs, ys):
        if self.normalize_inputs:
            # recompute normalizing constants for inputs
            new_mean = np.mean(xs, axis=0, keepdims=True)
            new_std = np.std(xs, axis=0, keepdims=True) + 1e-8
            tf.get_default_session().run(tf.group(
                tf.assign(self.x_mean_var, new_mean),
                tf.assign(self.x_std_var, new_std),
            ))
        if self.use_trust_region and self.first_optimized:
            old_prob = self.f_prob(xs)
            inputs = [xs, ys, old_prob]
            optimizer = self.tr_optimizer
        else:
            inputs = [xs, ys]
            optimizer = self.optimizer
        loss_before = optimizer.loss(inputs)
        if self.name:
            prefix = self.name + "_"
        else:
            prefix = ""
        logger.record_tabular(prefix + 'LossBefore', loss_before)
        optimizer.optimize(inputs)
        loss_after = optimizer.loss(inputs)
        logger.record_tabular(prefix + 'LossAfter', loss_after)
        logger.record_tabular(prefix + 'dLoss', loss_before - loss_after)
        self.first_optimized = True

    def predict(self, xs):
        return self.f_predict(np.asarray(xs))

    def predict_log_likelihood(self, xs, ys):
        prob = self.f_prob(np.asarray(xs))
        return self._dist.log_likelihood(np.asarray(ys), dict(prob=prob))

    def dist_info_sym(self, x_var):
        normalized_xs_var = (x_var - self.x_mean_var) / self.x_std_var
        prob = L.get_output(self.l_prob, {self.prob_network.input_layer: normalized_xs_var})
        return dict(prob=prob)

    def log_likelihood_sym(self, x_var, y_var):
        normalized_xs_var = (x_var - self.x_mean_var) / self.x_std_var
        prob = L.get_output(self.l_prob, {self.prob_network.input_layer: normalized_xs_var})
        return self._dist.log_likelihood_sym(y_var, dict(prob=prob))

    def get_param_values(self, **tags):
        return LayersPowered.get_param_values(self, **tags)

    def set_param_values(self, flattened_params, **tags):
        return LayersPowered.set_param_values(self, flattened_params, **tags)

    def get_action(self, observation):
        # observation = np.reshape(observation,(1,self.input_dim))
        flat_obs = self.observation_space.flatten(observation)
        prob = self.f_prob([flat_obs])[0]
        # action = self.f_predict(observation)
        action = self.action_space.weighted_sample(prob)
        # print(self.name,' :',action,' ',prob)
        return action, dict(prob=prob)

    # def get_actions(self, observations):
    #     probs = self.f_prob(observations)
    #     actions = self.f_predict(observations)
    #     return actions, dict(prob=probs)

    def reset(self): #do nothing
        return

    def terminate(self): #do nothing
        return

    def reload_initialize(self,sess):
        sess.run(self.x_mean_var.initializer)
        sess.run(self.x_std_var.initializer)
コード例 #17
0
    def __init__(
            self,
            name,
            input_shape,
            output_dim,
            prob_network=None,
            hidden_sizes=(32, 32),
            hidden_nonlinearity=tf.nn.tanh,
            optimizer=None,
            tr_optimizer=None,
            use_trust_region=True,
            step_size=0.01,
            normalize_inputs=True,
            no_initial_trust_region=True,
    ):
        """
        :param input_shape: Shape of the input data.
        :param output_dim: Dimension of output.
        :param hidden_sizes: Number of hidden units of each layer of the mean network.
        :param hidden_nonlinearity: Non-linearity used for each layer of the mean network.
        :param optimizer: Optimizer for minimizing the negative log-likelihood.
        :param use_trust_region: Whether to use trust region constraint.
        :param step_size: KL divergence constraint for each iteration
        """
        Serializable.quick_init(self, locals())

        with tf.variable_scope(name):
            if optimizer is None:
                optimizer = LbfgsOptimizer(name="optimizer")
            if tr_optimizer is None:
                tr_optimizer = ConjugateGradientOptimizer()

            self.input_dim = input_shape[0]
            self.observation_space = Discrete(self.input_dim)
            self.action_space = Discrete(output_dim)


            self.output_dim = output_dim
            self.optimizer = optimizer
            self.tr_optimizer = tr_optimizer

            if prob_network is None:
                prob_network = MLP(
                    input_shape=input_shape,
                    output_dim=output_dim,
                    hidden_sizes=hidden_sizes,
                    hidden_nonlinearity=hidden_nonlinearity,
                    output_nonlinearity=tf.nn.softmax,
                    name="prob_network"
                )

            l_prob = prob_network.output_layer

            LayersPowered.__init__(self, [l_prob])

            xs_var = prob_network.input_layer.input_var
            ys_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="ys")
            old_prob_var = tf.placeholder(dtype=tf.float32, shape=[None, output_dim], name="old_prob")

            x_mean_var = tf.get_variable(
                name="x_mean",
                shape=(1,) + input_shape,
                initializer=tf.constant_initializer(0., dtype=tf.float32)
            )
            x_std_var = tf.get_variable(
                name="x_std",
                shape=(1,) + input_shape,
                initializer=tf.constant_initializer(1., dtype=tf.float32)
            )

            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var

            normalized_xs_var = (xs_var - x_mean_var) / x_std_var

            prob_var = L.get_output(l_prob, {prob_network.input_layer: normalized_xs_var})

            old_info_vars = dict(prob=old_prob_var)
            info_vars = dict(prob=prob_var)

            dist = self._dist = Categorical(output_dim)

            mean_kl = tf.reduce_mean(dist.kl_sym(old_info_vars, info_vars))

            loss = - tf.reduce_mean(dist.log_likelihood_sym(ys_var, info_vars))

            predicted = tensor_utils.to_onehot_sym(tf.argmax(prob_var, axis=1), output_dim)

            self.prob_network = prob_network
            self.f_predict = tensor_utils.compile_function([xs_var], predicted)
            self.f_prob = tensor_utils.compile_function([xs_var], prob_var)
            self.l_prob = l_prob

            self.optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var], inputs=[xs_var, ys_var])
            self.tr_optimizer.update_opt(loss=loss, target=self, network_outputs=[prob_var],
                                         inputs=[xs_var, ys_var, old_prob_var],
                                         leq_constraint=(mean_kl, step_size)
                                         )

            self.use_trust_region = use_trust_region
            self.name = name

            self.normalize_inputs = normalize_inputs
            self.x_mean_var = x_mean_var
            self.x_std_var = x_std_var
            self.first_optimized = not no_initial_trust_region
コード例 #18
0
 def action_space(self):
     return Discrete(self.problem.num_actions)
コード例 #19
0
ファイル: test_spaces.py プロジェクト: zhmz90/rllab
def test_product_space():
    _ = Product([Discrete(3), Discrete(2)])
    product_space = Product(Discrete(3), Discrete(2))
    sample = product_space.sample()
    assert product_space.contains(sample)
コード例 #20
0
	def action_space(self):
		# Actions are Fire to go to or STAY
		return Discrete( NUM_FIRES + 1 )
コード例 #21
0
 def action_space(self):
     return Discrete(2)
コード例 #22
0
 def observation_space(self):
     return Discrete(self.n_row * self.n_col)
コード例 #23
0
	def action_space(self):
		# Actions are Fire to go to or STAY
		return Discrete( 5 + # Fires
						 1 ) # stay
コード例 #24
0
 def action_space(self):
     """
     Returns a Space object
     """
     return Discrete(2)