def _build_net(self, s, name, trainable):
        f1 = 1 / np.sqrt(self.f1)
        f2 = 0.003
        with tf.variable_scope(name):
            l1 = tf.layers.dense(s,
                                 self.f1,
                                 activation=tf.nn.relu,
                                 kernel_initializer=random_uniform(-f1, f1),
                                 bias_initializer=random_uniform(-f1, f1),
                                 trainable=trainable)

            with tf.variable_scope('q'):
                q = tf.layers.dense(l1,
                                    self.action_dim,
                                    kernel_initializer=random_uniform(-f2, f2),
                                    bias_initializer=random_uniform(-f2, f2),
                                    trainable=trainable)
        return q
Beispiel #2
0
 def create_critic_model(self):
     state = keras.Input(shape=(self.s_dim,))
     action = keras.Input(shape=(self.a_dim,))
     state_transform = keras.layers.Dense(self.s_dim, activation = 'relu')(state)
     input_state = keras.layers.concatenate([state_transform,action])
     h1 = keras.layers.Dense(self.hidden_layer_1, activation='relu')(input_state)
     h2 = keras.layers.Dense(self.hidden_layer_2, activation='relu')(h1)
     output = keras.layers.Dense(1,activation='linear', kernel_initializer=random_uniform())(h2)
     model = keras.Model([state,action],output)
     model.summary()
     return model
Beispiel #3
0
    def build_network(self):
        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, *self.input_dims],
                                        name='inputs')
            self.action_gradient = tf.placeholder(tf.float32,
                                                  shape=[None, self.n_actions])

            f1 = 1 / np.sqrt(self.fc1_dims)
            dense1 = tf.layers.dense(self.input,
                                     units=self.fc1_dims,
                                     kernel_initializer=random_uniform(
                                         -f1, f1),
                                     bias_initializer=random_uniform(-f1, f1))

            batch1 = tf.layers.batch_normalization(dense1)
            layer1_activation = tf.nn.relu(batch1)

            f2 = 1 / np.sqrt(self.fc2_dims)
            dense2 = tf.layers.dense(layer1_activation,
                                     units=self.fc2_dims,
                                     kernel_initializer=random_uniform(
                                         -f2, f2),
                                     bias_initializer=random_uniform(-f2, f2))

            batch2 = tf.layers.batch_normalization(dense2)
            layer2_activation = tf.nn.relu(batch2)

            f3 = 0.003
            mu = tf.layers.dense(layer2_activation,
                                 units=self.n_actions,
                                 activation='tanh',
                                 kernel_initializer=random_uniform(-f3, f3),
                                 bias_initializer=random_uniform(-f3, f3))
            self.mu = tf.multiply(mu, self.action_bound)
    def _build_net(self, s, name, trainable):
        f1 = 1 / np.sqrt(270)
        f3 = 0.003
        with tf.variable_scope(name):
            l1 = tf.layers.dense(s,
                                 270,
                                 activation=tf.nn.relu,
                                 kernel_initializer=random_uniform(-f1, f1),
                                 bias_initializer=random_uniform(-f1, f1),
                                 trainable=trainable)

            with tf.variable_scope('v'):
                v = tf.layers.dense(l1,
                                    1,
                                    kernel_initializer=random_uniform(-f3, f3),
                                    bias_initializer=random_uniform(-f3, f3),
                                    trainable=trainable)

            with tf.variable_scope('advantage'):
                q_a = tf.layers.dense(l1,
                                      self.action_dim,
                                      kernel_initializer=random_uniform(
                                          -f3, f3),
                                      bias_initializer=random_uniform(-f3, f3),
                                      trainable=trainable)
                q_a = q_a - tf.reduce_mean(q_a, axis=1, keep_dims=True)

            with tf.variable_scope('output_Q'):
                q = v + q_a
        return q
    def build_network(self):
        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, *self.input_dims],
                                        name='inputs')

            self.actions = tf.placeholder(tf.float32,
                                          shape=[None, self.n_actions],
                                          name='actions')

            self.q_target = tf.placeholder(tf.float32,
                                           shape=[None, 1],
                                           name='targets')

            f1 = 1. / np.sqrt(self.fc1_dims)
            dense1 = tf.layers.dense(self.input,
                                     units=self.fc1_dims,
                                     kernel_initializer=random_uniform(
                                         -f1, f1),
                                     bias_initializer=random_uniform(-f1, f1))
            batch1 = tf.layers.batch_normalization(dense1)
            layer1_activation = tf.nn.relu(batch1)

            f2 = 1. / np.sqrt(self.fc2_dims)
            dense2 = tf.layers.dense(layer1_activation,
                                     units=self.fc2_dims,
                                     kernel_initializer=random_uniform(
                                         -f2, f2),
                                     bias_initializer=random_uniform(-f2, f2))
            batch2 = tf.layers.batch_normalization(dense2)
            #layer2_activation = tf.nn.relu(batch2)
            #layer2_activation = tf.nn.relu(dense2)

            action_in = tf.layers.dense(self.actions,
                                        units=self.fc2_dims,
                                        activation='relu')
            #batch2 = tf.nn.relu(batch2)
            # no activation on action_in and relu activation on state_actions seems to
            # perform poorly.
            # relu activation on action_in and relu activation on state_actions
            # does reasonably well.
            # relu on batch2 and relu on action in performs poorly

            #state_actions = tf.concat([layer2_activation, action_in], axis=1)
            state_actions = tf.add(batch2, action_in)
            state_actions = tf.nn.relu(state_actions)
            f3 = 0.003
            self.q = tf.layers.dense(
                state_actions,
                units=1,
                kernel_initializer=random_uniform(-f3, f3),
                bias_initializer=random_uniform(-f3, f3),
                kernel_regularizer=tf.keras.regularizers.l2(0.01))

            self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
Beispiel #6
0
    def build_network(self):
        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, *self.input_dims],
                                        name='inputs')
            self.actions = tf.placeholder(tf.float32,
                                          shape=[None, self.n_actions],
                                          name='actions')
            '''
            y[i] = rewards[i] + gamma * Q'( s[i+1], mu'(s[i+1]|theta_mu_') | theta_Q_')
            Critic update by minimizing loss:
                L = 1/N * sum( (y[i] - Q(s[i], a[i] | theta_Q)) ** 2 )
                grad_wrt_theta_mu(J) = 1/N * sum ( grad_wrt_a(Q(s,a|theta_Q) | s=s[i], a=mu(s[i])) * grad_wrt_theta_mu(mu(s|theta_mu) | s[i]))
            '''
            self.q_target = tf.placeholder(tf.float32,
                                           shape=[None, 1],
                                           name='targets')

            f1 = 1 / np.sqrt(self.fc1_dims)
            dense1 = tf.layers.dense(self.input,
                                     units=self.fc1_dims,
                                     kernel_initializer=random_uniform(
                                         -f1, f1),
                                     bias_initializer=random_uniform(-f1, f1))

            batch1 = tf.layers.batch_normalization(dense1)
            layer1_activation = tf.nn.relu(batch1)

            f2 = 1 / np.sqrt(self.fc2_dims)
            dense2 = tf.layers.dense(self.input,
                                     units=self.fc2_dims,
                                     kernel_initializer=random_uniform(
                                         -f2, f2),
                                     bias_initializer=random_uniform(-f2, f2))

            batch2 = tf.layers.batch_normalization(dense2)

            action_in = tf.layers.dense(self.actions,
                                        units=self.fc2_dims,
                                        activation='relu')
            state_actions = tf.add(batch2, action_in)
            state_actions = tf.nn.relu(state_actions)

            f3 = 0.003
            self.q = tf.layers.dense(
                state_actions,
                units=1,
                kernel_initializer=random_uniform(-f3, f3),
                bias_initializer=random_uniform(-f3, f3),
                kernel_regularizer=tf.keras.regularizers.l2(0.01))
            self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
    def _build_net(self, s, a, name, trainable):
        f1 = 1 / np.sqrt(400)
        f2 = 1 / np.sqrt(300)
        f3 = 0.003

        with tf.variable_scope(name):
            with tf.variable_scope('l1'):
                l1 = tf.layers.batch_normalization(s)
                l1 = tf.layers.dense(l1,
                                     400,
                                     kernel_initializer=random_uniform(
                                         -f1, f1),
                                     bias_initializer=random_uniform(-f1, f1),
                                     trainable=trainable)
                l2 = tf.layers.batch_normalization(l1)
                l2 = tf.layers.dense(l2,
                                     300,
                                     kernel_initializer=random_uniform(
                                         -f2, f2),
                                     bias_initializer=random_uniform(-f2, f2),
                                     trainable=trainable)
                l2 = tf.layers.batch_normalization(l2)
                a_in = tf.layers.dense(
                    a,
                    300,
                    kernel_initializer=random_uniform(-f2, f2),
                    bias_initializer=random_uniform(-f2, f2),
                    trainable=trainable)
                b2 = tf.layers.batch_normalization(l2 + a_in)
                net = tf.nn.relu(b2)

            with tf.variable_scope('q'):
                q = tf.layers.dense(net,
                                    1,
                                    kernel_initializer=random_uniform(-f3, f3),
                                    bias_initializer=random_uniform(-f3, f3),
                                    trainable=trainable)  # Q(s,a)
        return q
Beispiel #8
0
    def build_network(self):
        fc1_dims = 400
        fc2_dims = 300

        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, *self.input_dims],
                                        name='inputs')
            self.actions = tf.placeholder(tf.float32,
                                          shape=[None, self.n_actions],
                                          name='actions')
            self.q_target = tf.placeholder(tf.float32,
                                           shape=[None, 1],
                                           name='targets')

            f1 = 1 / np.sqrt(fc1_dims)
            dense1 = tf.layers.dense(self.input,
                                     units=fc1_dims,
                                     kernel_initializer=random_uniform(
                                         -f1, f1),
                                     bias_initializer=random_uniform(-f1, f1))
            batch1 = tf.layers.batch_normalization(dense1)
            layer1_activation = tf.nn.relu(batch1)

            f2 = 1 / np.sqrt(fc2_dims)
            dense2 = tf.layers.dense(layer1_activation,
                                     units=fc2_dims,
                                     kernel_initializer=random_uniform(
                                         -f2, f2),
                                     bias_initializer=random_uniform(-f2, f2))
            batch2 = tf.layers.batch_normalization(dense2)

            action_in = tf.layers.dense(self.actions,
                                        units=fc2_dims,
                                        activation='relu')

            state_actions = tf.add(batch2, action_in)
            state_actions = tf.nn.relu(state_actions)

            f3 = 0.003
            self.q = tf.layers.dense(
                state_actions,
                units=1,
                kernel_initializer=random_uniform(-f3, f3),
                bias_initializer=random_uniform(-f3, f3),
                kernel_regularizer=tf.keras.regularizers.l2(0.01))

            self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
    def _build_net(self, s, name, trainable):
        f1 = 1 / np.sqrt(400)
        f2 = 1 / np.sqrt(300)
        f3 = 0.003

        with tf.variable_scope(name):
            l1 = tf.layers.batch_normalization(s)

            l1 = tf.layers.dense(l1,
                                 400,
                                 activation=tf.nn.relu,
                                 kernel_initializer=random_uniform(-f1, f1),
                                 bias_initializer=random_uniform(-f1, f1),
                                 name='l1',
                                 trainable=trainable)
            l2 = tf.layers.batch_normalization(l1)
            l2 = tf.layers.dense(l2,
                                 300,
                                 activation=tf.nn.relu,
                                 kernel_initializer=random_uniform(-f2, f2),
                                 bias_initializer=random_uniform(-f2, f2),
                                 name='l2',
                                 trainable=trainable)
            l3 = tf.layers.batch_normalization(l2)

            with tf.variable_scope('a'):
                actions = tf.layers.dense(
                    l3,
                    self.action_dim,
                    activation=tf.nn.tanh,
                    kernel_initializer=random_uniform(-f3, f3),
                    bias_initializer=random_uniform(-f3, f3),
                    name='a',
                    trainable=trainable)
                scaled_a = tf.multiply(
                    actions, self.action_bound, name='scaled_a'
                )  # Scale output to -action_bound to action_bound
        return scaled_a
    def init_model(self,
                   input_size,
                   n_hidden=50,
                   n_tensors=20,
                   n_layers=2,
                   dropout_rate=0.5,
                   rnn_type='gru',
                   optimizer='nadam',
                   loss_function='categorical_crossentropy'):
        """
        Initialize model.

        Parameters
        ----------
        input_size: int
            Input size for this model.

        n_hidden: int, optional, default: 50
            Number of hidden units.

        n_tensors: int, optional, default: 20
            Number of tensors that capture the compositions between input tokens
            and the prototypes (aspect and opinion).

        n_layers: int, optional, default: 2
            Number of layers for the attention network.

        dropout_rate: float, optional, default: 0.5
            Dropout rate to used during training.

        rnn_type: {'gru', 'lstm', 'bilstm', 'bigru'}, optional, default: 'gru'
            Type of RNN used.

        optimizer: tf.keras.optimizers, optional, default: 'nadam'
            Optimizer to use during training.
            See https://www.tensorflow.org/api_docs/python/tf/keras/optimizers.

        loss_function: tf.keras.losses, optional, default: 'categorical_crossentropy'
            Loss function to use during training.
            See https://www.tensorflow.org/api_docs/python/tf/keras/losses.
        """
        input = layers.Input(shape=(None, input_size))

        if rnn_type == 'gru':
            rnn = layers.GRU(
                units=n_hidden,
                recurrent_activation='sigmoid',
                return_sequences=True,
                kernel_initializer=initializers.random_uniform(-0.2, 0.2),
                recurrent_initializer=initializers.random_uniform(-0.2, 0.2),
                dropout=dropout_rate)(input)

        elif rnn_type == 'lstm':
            rnn = layers.LSTM(
                units=n_hidden,
                recurrent_activation='sigmoid',
                return_sequences=True,
                kernel_initializer=initializers.random_uniform(-0.2, 0.2),
                recurrent_initializer=initializers.random_uniform(-0.2, 0.2),
                dropout=dropout_rate)(input)

        elif rnn_type == 'bilstm':
            rnn = layers.Bidirectional(
                layers.LSTM(units=n_hidden,
                            recurrent_activation='sigmoid',
                            return_sequences=True,
                            kernel_initializer=initializers.random_uniform(
                                -0.2, 0.2),
                            recurrent_initializer=initializers.random_uniform(
                                -0.2, 0.2),
                            dropout=dropout_rate))(input)

        elif rnn_type == 'bigru':
            rnn = layers.Bidirectional(
                layers.GRU(units=n_hidden,
                           recurrent_activation='sigmoid',
                           return_sequences=True,
                           kernel_initializer=initializers.random_uniform(
                               -0.2, 0.2),
                           recurrent_initializer=initializers.random_uniform(
                               -0.2, 0.2),
                           dropout=dropout_rate))(input)

        else:
            raise ValueError(
                "Unknown rnn type. Valid types are gru, lstm, bilstm, or bigru."
            )

        dropout = layers.Dropout(dropout_rate)(rnn)

        if rnn_type == 'gru' or rnn_type == 'lstm':
            cmla = CMLA(n_tensors, n_hidden, n_layers, dropout_rate,
                        rnn_type)(dropout)

        elif rnn_type == 'bilstm' or rnn_type == 'bigru':
            cmla = CMLA(n_tensors, 2 * n_hidden, n_layers, dropout_rate,
                        rnn_type)(dropout)

        aspect_prob = layers.Dense(
            3,
            activation='softmax',
            kernel_initializer=initializers.random_uniform(-0.2, 0.2))(cmla[0])

        opinion_prob = layers.Dense(
            3,
            activation='softmax',
            kernel_initializer=initializers.random_uniform(-0.2, 0.2))(cmla[1])

        self.model = tf.keras.Model(inputs=input,
                                    outputs=[aspect_prob, opinion_prob])
        self.model.compile(optimizer=optimizer,
                           loss=loss_function,
                           metrics=['accuracy'])
Beispiel #11
0
    def __init__(self, name, session, observation_space, action_space):
        self.name = name

        assert isinstance(action_space, spaces.Box)
        assert len(action_space.shape) == 1
        assert (np.abs(action_space.low) == action_space.high
                ).all()  # we assume symmetric actions.

        self.session = session

        with tf.variable_scope(self.name):
            self.observations_input = tf.placeholder(
                observation_space.dtype,
                shape=tuple([None] + list(observation_space.shape)),
                name="observations_input")

            self.actions_input = tf.placeholder(
                action_space.dtype,
                shape=tuple([None] + list(action_space.shape)),
                name="actions_input")

            self.actions_size = action_space.shape[0]

            self.combined_input = tf.concat(
                [self.observations_input, self.actions_input],
                axis=1,
                name="combined_input")

            with tf.variable_scope("actor"):
                self.actor01 = layers.dense(
                    inputs=self.observations_input,
                    units=64,
                    activation=tf.tanh,
                    name="layer_one",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.actor02 = layers.dense(
                    inputs=self.actor01,
                    units=64,
                    activation=tf.tanh,
                    name="layer_two",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.actor_head = layers.dense(
                    inputs=self.actor02,
                    units=self.actions_input.shape[1].value,
                    activation=tf.tanh,
                    name="head",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(gain=0.01))

                self.actor_head_rescaled = self.actor_head * action_space.high

            self.model_computed_combined = tf.concat(
                [self.observations_input, self.actor_head_rescaled],
                axis=1,
                name="model_computed_combined")

            with tf.variable_scope("critic"):
                self.critic01 = layers.dense(
                    inputs=self.combined_input,
                    units=64,
                    activation=tf.tanh,
                    name="layer_one",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.critic02 = layers.dense(
                    inputs=self.critic01,
                    units=64,
                    activation=tf.tanh,
                    name="layer_two",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.action_value_head = layers.dense(
                    inputs=self.critic02,
                    units=1,
                    activation=None,
                    name="head",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.random_uniform(
                        -3.0e-3, 3.0e-3))

                self.model_critic01 = layers.dense(
                    inputs=self.model_computed_combined,
                    units=64,
                    activation=tf.tanh,
                    name="layer_one",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)),
                    reuse=
                    True  # Use the same weights for 'critic' and for 'model critic'
                )

                self.model_critic02 = layers.dense(
                    inputs=self.model_critic01,
                    units=64,
                    activation=tf.tanh,
                    name="layer_two",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)),
                    reuse=
                    True  # Use the same weights for 'critic' and for 'model critic'
                )

                self.state_value_head = layers.dense(
                    inputs=self.model_critic02,
                    units=1,
                    activation=None,
                    name="head",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.random_uniform(
                        -3.0e-3, 3.0e-3),
                    reuse=
                    True  # Use the same weights for 'critic' and for 'model critic'
                )
Beispiel #12
0
    def __init__(self,
                 n_tensors=20,
                 n_hidden=50,
                 n_layers=2,
                 dropout_rate=0.5,
                 rnn_type='gru',
                 **kwargs):
        super(CMLA, self).__init__(**kwargs)
        self.n_tensors = n_tensors
        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.dropout_rate = dropout_rate
        self.rnn_type = rnn_type

        self.dot_layer = layers.Dot(axes=0)

        if self.rnn_type == 'gru':
            self.rnn_aspect_layer = layers.GRU(
                units=2 * self.n_tensors,
                recurrent_activation='sigmoid',
                return_sequences=True,
                use_bias=False,
                kernel_initializer=initializers.random_uniform(-0.2, 0.2),
                recurrent_initializer=initializers.random_uniform(-0.2, 0.2),
                dropout=self.dropout_rate)

            self.rnn_opinion_layer = layers.GRU(
                units=2 * self.n_tensors,
                recurrent_activation='sigmoid',
                return_sequences=True,
                use_bias=False,
                kernel_initializer=initializers.random_uniform(-0.2, 0.2),
                recurrent_initializer=initializers.random_uniform(-0.2, 0.2),
                dropout=self.dropout_rate)

        elif self.rnn_type == 'lstm':
            self.rnn_aspect_layer = layers.LSTM(
                units=2 * self.n_tensors,
                recurrent_activation='sigmoid',
                return_sequences=True,
                use_bias=False,
                kernel_initializer=initializers.random_uniform(-0.2, 0.2),
                recurrent_initializer=initializers.random_uniform(-0.2, 0.2),
                dropout=self.dropout_rate)

            self.rnn_opinion_layer = layers.LSTM(
                units=2 * self.n_tensors,
                recurrent_activation='sigmoid',
                return_sequences=True,
                use_bias=False,
                kernel_initializer=initializers.random_uniform(-0.2, 0.2),
                recurrent_initializer=initializers.random_uniform(-0.2, 0.2),
                dropout=self.dropout_rate)

        elif self.rnn_type == 'bilstm':
            self.rnn_aspect_layer = layers.Bidirectional(
                layers.LSTM(units=2 * self.n_tensors,
                            recurrent_activation='sigmoid',
                            return_sequences=True,
                            use_bias=False,
                            kernel_initializer=initializers.random_uniform(
                                -0.2, 0.2),
                            recurrent_initializer=initializers.random_uniform(
                                -0.2, 0.2),
                            dropout=self.dropout_rate))

            self.rnn_opinion_layer = layers.Bidirectional(
                layers.LSTM(units=2 * self.n_tensors,
                            recurrent_activation='sigmoid',
                            return_sequences=True,
                            use_bias=False,
                            kernel_initializer=initializers.random_uniform(
                                -0.2, 0.2),
                            recurrent_initializer=initializers.random_uniform(
                                -0.2, 0.2),
                            dropout=self.dropout_rate))

        elif self.rnn_type == 'bigru':
            self.rnn_aspect_layer = layers.Bidirectional(
                layers.GRU(units=2 * self.n_tensors,
                           recurrent_activation='sigmoid',
                           return_sequences=True,
                           use_bias=False,
                           kernel_initializer=initializers.random_uniform(
                               -0.2, 0.2),
                           recurrent_initializer=initializers.random_uniform(
                               -0.2, 0.2),
                           dropout=self.dropout_rate))

            self.rnn_opinion_layer = layers.Bidirectional(
                layers.GRU(units=2 * self.n_tensors,
                           recurrent_activation='sigmoid',
                           return_sequences=True,
                           use_bias=False,
                           kernel_initializer=initializers.random_uniform(
                               -0.2, 0.2),
                           recurrent_initializer=initializers.random_uniform(
                               -0.2, 0.2),
                           dropout=self.dropout_rate))
        else:
            raise ValueError(
                "Unknown rnn type. Valid types are gru, lstm, bilstm, or bigru."
            )
Beispiel #13
0
    def build(self, input_shape):
        """
        Create and initialize trainable weights.
        """
        self.m0_a = self.add_weight(name='m0_a',
                                    shape=(1, self.n_hidden),
                                    initializer=initializers.random_uniform(
                                        -0.2, 0.2),
                                    trainable=True)

        self.m0_o = self.add_weight(name='m0_o',
                                    shape=(1, self.n_hidden),
                                    initializer=initializers.random_uniform(
                                        -0.2, 0.2),
                                    trainable=True)

        self.Ua = self.add_weight(
            name='Ua',
            shape=(self.n_tensors, self.n_hidden, self.n_hidden),
            initializer=initializers.random_uniform(-0.2, 0.2),
            trainable=True)

        self.Uo = self.add_weight(
            name='Uo',
            shape=(self.n_tensors, self.n_hidden, self.n_hidden),
            initializer=initializers.random_uniform(-0.2, 0.2),
            trainable=True)

        self.Va = self.add_weight(
            name='Va',
            shape=(self.n_tensors, self.n_hidden, self.n_hidden),
            initializer=initializers.random_uniform(-0.2, 0.2),
            trainable=True)

        self.Vo = self.add_weight(
            name='Vo',
            shape=(self.n_tensors, self.n_hidden, self.n_hidden),
            initializer=initializers.random_uniform(-0.2, 0.2),
            trainable=True)

        if self.n_layers > 1:
            if self.rnn_type == 'gru' or self.rnn_type == 'lstm':
                self.va = self.add_weight(
                    name='va',
                    shape=(2 * self.n_tensors, 1),
                    initializer=initializers.random_uniform(-0.2, 0.2),
                    trainable=True)

                self.vo = self.add_weight(
                    name='vo',
                    shape=(2 * self.n_tensors, 1),
                    initializer=initializers.random_uniform(-0.2, 0.2),
                    trainable=True)

            elif self.rnn_type == 'bilstm' or self.rnn_type == 'bigru':
                self.va = self.add_weight(
                    name='va',
                    shape=(4 * self.n_tensors, 1),
                    initializer=initializers.random_uniform(-0.2, 0.2),
                    trainable=True)

                self.vo = self.add_weight(
                    name='vo',
                    shape=(4 * self.n_tensors, 1),
                    initializer=initializers.random_uniform(-0.2, 0.2),
                    trainable=True)

            self.Ma = self.add_weight(name='Ma',
                                      shape=(self.n_hidden, self.n_hidden),
                                      initializer=initializers.random_uniform(
                                          -0.2, 0.2),
                                      trainable=True)

            self.Mo = self.add_weight(name='Mo',
                                      shape=(self.n_hidden, self.n_hidden),
                                      initializer=initializers.random_uniform(
                                          -0.2, 0.2),
                                      trainable=True)

        super(CMLA, self).build(input_shape)  # Be sure to call this somewhere!
Beispiel #14
0
    def build_network(self):
        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, *self.input_dims],
                                        name="inputs")
            self.actions = tf.placeholder(tf.float32,
                                          shape=[None, self.n_actions],
                                          name="actions")
            self.q_target = tf.placeholder(tf.float32,
                                           shape=[None, 1],
                                           name='targets')

            if self.use_aggregator:
                self.adjacency = tf.placeholder(tf.float32,
                                                shape=[None, 117, 117],
                                                name="Adjacency_matrix")
                self.degree = tf.placeholder(tf.float32,
                                             shape=[None, 117, 117],
                                             name="Degree_matrix")

                conv1 = tf.layers.Conv1D(200, 1,
                                         activation="linear")(self.input)
                a_1 = tf.matmul(self.adjacency, conv1)
                d_1 = tf.matmul(self.degree, a_1)
                act_1 = tf.nn.relu(d_1)

                conv_2 = tf.layers.Conv1D(100, 1, activation="linear")(act_1)
                a_2 = tf.matmul(self.adjacency, conv_2)
                d_2 = tf.matmul(self.degree, a_2)
                act_2 = tf.nn.relu(d_2)

                gmp = tf.keras.layers.GlobalMaxPooling1D()(act_2)

                f1 = 1 / np.sqrt(self.fc1_dims)
                dense1 = tf.layers.dense(
                    gmp,
                    units=self.fc1_dims,
                    kernel_initializer=random_uniform(-f1, f1),
                    bias_initializer=random_uniform(-f1, f1))
                batch1 = tf.layers.batch_normalization(dense1)
                layer1_activation = tf.nn.relu(batch1)

                f2 = 1 / np.sqrt(self.fc2_dims)
                dense2 = tf.layers.dense(
                    layer1_activation,
                    units=self.fc2_dims,
                    kernel_initializer=random_uniform(-f2, f2),
                    bias_initializer=random_uniform(-f2, f2))
                batch2 = tf.layers.batch_normalization(dense2)

                action_in = tf.layers.dense(self.actions,
                                            units=self.fc2_dims,
                                            activation='relu')

                state_actions = tf.add(batch2, action_in)
                state_actions = tf.nn.relu(state_actions)

                f3 = 0.003
                self.q = tf.layers.dense(
                    state_actions,
                    units=1,
                    kernel_initializer=random_uniform(-f3, f3),
                    bias_initializer=random_uniform(-f3, f3),
                    kernel_regularizer=tf.keras.regularizers.l2(0.01))

            else:
                f1 = 1 / np.sqrt(self.fc1_dims)
                dense1 = tf.layers.dense(
                    self.input,
                    units=self.fc1_dims,
                    kernel_initializer=random_uniform(-f1, f1),
                    bias_initializer=random_uniform(-f1, f1))
                batch1 = tf.layers.batch_normalization(dense1)
                layer1_activation = tf.nn.relu(batch1)

                f2 = 1 / np.sqrt(self.fc2_dims)
                dense2 = tf.layers.dense(
                    layer1_activation,
                    units=self.fc2_dims,
                    kernel_initializer=random_uniform(-f2, f2),
                    bias_initializer=random_uniform(-f2, f2))
                batch2 = tf.layers.batch_normalization(dense2)

                action_in = tf.layers.dense(self.actions,
                                            units=self.fc2_dims,
                                            activation='relu')

                state_actions = tf.add(batch2, action_in)
                state_actions = tf.nn.relu(state_actions)

                f3 = 0.003
                self.q = tf.layers.dense(
                    state_actions,
                    units=1,
                    kernel_initializer=random_uniform(-f3, f3),
                    bias_initializer=random_uniform(-f3, f3),
                    kernel_regularizer=tf.keras.regularizers.l2(0.01))

            self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
Beispiel #15
0
    def build_network(self, use_aggregator=True):
        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, *self.input_dims],
                                        name="Inputs_features")
            self.action_gradient = tf.placeholder(tf.float32,
                                                  shape=[None, self.n_actions])

            if self.use_aggregator:
                self.adjacency = tf.placeholder(tf.float32,
                                                shape=[None, 117, 117],
                                                name="Adjacency_matrix")
                self.degree = tf.placeholder(tf.float32,
                                             shape=[None, 117, 117],
                                             name="Degree_matrix")

                conv1 = tf.layers.Conv1D(150, 1,
                                         activation="linear")(self.input)
                a_1 = tf.matmul(self.adjacency, conv1)
                d_1 = tf.matmul(self.degree, a_1)
                act_1 = tf.nn.relu(d_1)

                conv_2 = tf.layers.Conv1D(75, 1, activation="linear")(act_1)
                a_2 = tf.matmul(self.adjacency, conv_2)
                d_2 = tf.matmul(self.degree, a_2)
                act_2 = tf.nn.relu(d_2)

                gmp = tf.keras.layers.GlobalMaxPooling1D()(act_2)

                f1 = 1 / np.sqrt(self.fc1_dims)
                dense1 = tf.layers.dense(
                    gmp,
                    units=self.fc1_dims,
                    kernel_initializer=random_uniform(-f1, f1),
                    bias_initializer=random_uniform(-f1, f1))
                batch1 = tf.layers.batch_normalization(dense1)
                layer1_activation = tf.nn.relu(batch1)

                f2 = 1 / np.sqrt(self.fc2_dims)

                dense2 = tf.layers.dense(
                    layer1_activation,
                    units=self.fc2_dims,
                    kernel_initializer=random_uniform(-f2, f2),
                    bias_initializer=random_uniform(-f2, f2))
                batch2 = tf.layers.batch_normalization(dense2)
                layer2_activation = tf.nn.relu(batch2)

                f3 = 0.003
                self.mu = tf.layers.dense(
                    layer2_activation,
                    units=self.n_actions,
                    activation='linear',
                    kernel_initializer=random_uniform(-f3, f3),
                    bias_initializer=random_uniform(-f3, f3))
                #self.mu = tf.multiply(mu, self.action_bound)

            else:

                f1 = 1 / np.sqrt(self.fc1_dims)
                dense1 = tf.layers.dense(
                    self.input,
                    units=self.fc1_dims,
                    kernel_initializer=random_uniform(-f1, f1),
                    bias_initializer=random_uniform(-f1, f1))
                batch1 = tf.layers.batch_normalization(dense1)
                layer1_activation = tf.nn.relu(batch1)

                f2 = 1 / np.sqrt(self.fc2_dims)

                dense2 = tf.layers.dense(
                    layer1_activation,
                    units=self.fc2_dims,
                    kernel_initializer=random_uniform(-f2, f2),
                    bias_initializer=random_uniform(-f2, f2))
                batch2 = tf.layers.batch_normalization(dense2)
                layer2_activation = tf.nn.relu(batch2)

                f3 = 0.003
                self.mu = tf.layers.dense(
                    layer2_activation,
                    units=self.n_actions,
                    activation='linear',
                    kernel_initializer=random_uniform(-f3, f3),
                    bias_initializer=random_uniform(-f3, f3))