def __init__(self, dim_state, dim_action, hidden_sizes):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes

        with self.scope:
            self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state])
            self.op_actions = tf.placeholder(tf.float32, shape=[None, dim_action])
            layers = []
            all_sizes = [dim_state + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            self.net1 = nn.Sequential(*layers)
            layers = []
            all_sizes = [dim_state + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            self.net2 = nn.Sequential(*layers)

        self.op_q1, self.op_q2 = self.forward(self.op_states, self.op_actions)
Ejemplo n.º 2
0
    def __init__(self, dim_state: int, dim_action: int,
                 normalizers: Normalizers, *, arch: FLAGS.arch):
        super().__init__()
        initializer = tf.truncated_normal_initializer(mean=0.0, stddev=1e-5)

        self.dim_state = dim_state
        self.dim_action = dim_action
        self.op_states = tf.placeholder(tf.float32,
                                        shape=[None, self.dim_state],
                                        name='states')
        self.op_actions = tf.placeholder(tf.float32,
                                         shape=[None, self.dim_action],
                                         name='actions')
        self.mlp = nn.Sequential(
            nn.Linear(dim_state + dim_action,
                      arch.n_units,
                      weight_initializer=initializer),
            nn.ReLU(),
            make_blocks(FixupResBlock, arch.n_units, arch.n_blocks,
                        arch.n_blocks),
            nn.Linear(arch.n_units, dim_state, weight_initializer=initializer),
        )

        self.normalizers = normalizers
        self.build()
Ejemplo n.º 3
0
    def __init__(self,
                 dim_state: int,
                 dim_action: int,
                 hidden_sizes: List[int],
                 normalizer: Normalizers,
                 save_normalizer=False):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        # this avoid to save normalizer into self.state_dict
        self.state_process_fn = lambda states_: normalizer.state(states_)
        self.action_process_fn = lambda actions_: actions_
        if save_normalizer:
            self.normalizer = normalizer

        with self.scope:
            self.op_states = tf.placeholder(tf.float32, [None, dim_state],
                                            "state")
            self.op_actions = tf.placeholder(tf.float32, [None, dim_action],
                                             "action")

            layers = []
            all_sizes = [dim_state + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(
                    zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            self.net = nn.Sequential(*layers)

            self.op_logits = self(self.op_states, self.op_actions)
            self.op_rewards = -tf.log(1 - tf.nn.sigmoid(self.op_logits) + 1e-6)
 def forward(self, x):
     x = self.net(x)
     x = tf.layers.flatten(x)
     if not self.initialized:
         layer = [
             FCLayer(nin=x.shape[-1].value, nh=512, init_scale=np.sqrt(2)),
             nn.ReLU()
         ]
         self.conv_to_fc = nn.Sequential(*layer)
         self.initialized = True
     x = self.conv_to_fc(x)
     return x
Ejemplo n.º 5
0
 def __init__(self, x, n_total_blocks):
     super().__init__()
     std = np.sqrt(2. / x / n_total_blocks)
     self.bias1a = nn.Parameter(tf.zeros(1), name='bias1a')
     self.fc1 = nn.Linear(x,
                          x,
                          bias=False,
                          weight_initializer=tf.initializers.random_normal(
                              0, stddev=std))
     self.bias1b = nn.Parameter(tf.zeros(1), name='bias1b')
     self.relu = nn.ReLU()
     self.bias2a = nn.Parameter(tf.zeros(1), name='bias2a')
     self.fc2 = nn.Linear(x,
                          x,
                          bias=False,
                          weight_initializer=tf.initializers.zeros())
     self.scale = nn.Parameter(tf.ones(1), name='scale')
     self.bias2b = nn.Parameter(tf.zeros(1), name='bias2b')
Ejemplo n.º 6
0
    def __init__(self,
                 dim_state: int,
                 dim_action: int,
                 hidden_sizes: List[int],
                 state_process_fn,
                 action_process_fn,
                 activ_fn='none'):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        # this avoid to save normalizer into self.state_dict
        self.state_process_fn = state_process_fn
        self.action_process_fn = action_process_fn

        with self.scope:
            self.op_states = tf.placeholder(tf.float32, [None, dim_state],
                                            "state")
            self.op_actions = tf.placeholder(tf.float32, [None, dim_action],
                                             "action")
            self.op_next_states = tf.placeholder(tf.float32, [None, dim_state],
                                                 "next_state")

            layers = []
            all_sizes = [dim_state * 2 + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(
                    zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            if activ_fn == 'none':
                pass
            elif activ_fn == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activ_fn == 'tanh':
                layers.append(nn.Tanh())
            else:
                raise ValueError('%s is not supported' % activ_fn)
            self.net = nn.Sequential(*layers)

            self.op_logits = self(self.op_states, self.op_actions,
                                  self.op_next_states)
            self.op_rewards = -tf.log(1 - tf.nn.sigmoid(self.op_logits) + 1e-6)
    def __init__(self,
                 nin,
                 hidden_sizes=(
                     32,
                     64,
                     64,
                 ),
                 kernel_sizes=(8, 4, 3),
                 strides=(4, 2, 1),
                 init_scale=np.sqrt(2)):
        super().__init__()

        assert len(hidden_sizes) == len(kernel_sizes) == len(strides)
        layer = []
        for i in range(len(hidden_sizes)):
            nf, rf, stride = hidden_sizes[i], kernel_sizes[i], strides[i]
            layer.append(ConvLayer(nin, nf, rf, stride, init_scale=init_scale))
            layer.append(nn.ReLU())
            nin = nf
        self.layer = nn.Sequential(*layer)
Ejemplo n.º 8
0
    def __init__(self, dim_state, dim_action, hidden_sizes: List[int]):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes

        with self.scope:
            self.op_states = tf.placeholder(tf.float32,
                                            shape=[None, dim_state],
                                            name='states')

            layers = []
            all_sizes = [dim_state, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(
                    zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], dim_action, init_scale=0.01))
            layers.append(nn.Tanh())
            self.net = nn.Sequential(*layers)
            self.op_actions = self(self.op_states)
Ejemplo n.º 9
0
    def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int]):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        with self.scope:
            self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state], name='states')
            self.op_actions_ = tf.placeholder(tf.float32, shape=[None, dim_action], name='actions')

            layers = []
            all_sizes = [dim_state, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], dim_action*2))
            self.net = nn.Sequential(*layers)

        self.op_actions, self.op_log_density, pd, self.op_dist_mean, self.op_dist_log_std = self(self.op_states)
        self.op_actions_mean = tf.tanh(self.op_dist_mean)
        pi_ = tf.atanh(clip_but_pass_gradient(self.op_actions_, -1+EPS, 1-EPS))
        log_prob_pi_ = pd.log_prob(pi_).reduce_sum(axis=1)
        log_prob_pi_ -= tf.reduce_sum(tf.log(1 - self.op_actions_ ** 2 + EPS), axis=1)
        self.op_log_density_ = log_prob_pi_