Example #1
0
    def __init__(self,
                 dim_input,
                 dim_output,
                 dim_hidden=32,
                 num_layers=4,
                 num_particles=2,
                 max_test_step=5):
        # model size
        self.dim_input = dim_input
        self.dim_output = dim_output
        self.dim_hidden = dim_hidden
        self.num_layers = num_layers
        self.num_particles = num_particles

        # learning rate
        self.follow_lr = tf.placeholder_with_default(input=FLAGS.follow_lr,
                                                     name='follow_lr',
                                                     shape=[])
        self.leader_lr = tf.placeholder_with_default(input=FLAGS.leader_lr,
                                                     name='leader_lr',
                                                     shape=[])
        self.meta_lr = tf.placeholder_with_default(input=FLAGS.meta_lr,
                                                   name='meta_lr',
                                                   shape=[])

        # for test time
        self.max_test_step = max_test_step

        # build model
        self.bnn = BNN(dim_input=self.dim_input,
                       dim_output=self.dim_output,
                       dim_hidden=self.dim_hidden,
                       num_layers=self.num_layers,
                       is_bnn=True)

        # init model
        self.construct_network_weights = self.bnn.construct_network_weights

        # forwarding
        self.forward_network = self.bnn.forward_network

        # init input data
        self.follow_x = tf.placeholder(dtype=tf.float32, name='follow_x')
        self.follow_y = tf.placeholder(dtype=tf.float32, name='follow_y')
        self.leader_x = tf.placeholder(dtype=tf.float32, name='leader_x')
        self.leader_y = tf.placeholder(dtype=tf.float32, name='leader_y')
        self.valid_x = tf.placeholder(dtype=tf.float32, name='valid_x')
        self.valid_y = tf.placeholder(dtype=tf.float32, name='valid_y')

        # init parameters
        self.W_network_particles = None
Example #2
0
def run_cartpole_expl():
    env = gym.make('CartPole-v0')

    obs_dim = np.prod(env.observation_space.shape)
    act_dim = np.prod(env.action_space.shape)
    n_actions = env.action_space.n

    policy_hidden_dim = 256
    policy = Policy(obs_dim, policy_hidden_dim, n_actions)

    input_dim = int(obs_dim + act_dim)
    output_dim = int(obs_dim)
    hidden_dim = 64
    model = BNN(input_dim, hidden_dim, output_dim)

    exp = Experiment(policy,
                     model,
                     env,
                     exp_name="cartpole_expl",
                     train_model=True,
                     calc_inf_gain=True)
    exp.train()
Example #3
0
 def init_estimator(self, W=None, B=None):
     return BNN(layers=self.layers, W=W, B=B)