def _build_net(self, s, name, trainable): f1 = 1 / np.sqrt(self.f1) f2 = 0.003 with tf.variable_scope(name): l1 = tf.layers.dense(s, self.f1, activation=tf.nn.relu, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1), trainable=trainable) with tf.variable_scope('q'): q = tf.layers.dense(l1, self.action_dim, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2), trainable=trainable) return q
def create_critic_model(self): state = keras.Input(shape=(self.s_dim,)) action = keras.Input(shape=(self.a_dim,)) state_transform = keras.layers.Dense(self.s_dim, activation = 'relu')(state) input_state = keras.layers.concatenate([state_transform,action]) h1 = keras.layers.Dense(self.hidden_layer_1, activation='relu')(input_state) h2 = keras.layers.Dense(self.hidden_layer_2, activation='relu')(h1) output = keras.layers.Dense(1,activation='linear', kernel_initializer=random_uniform())(h2) model = keras.Model([state,action],output) model.summary() return model
def build_network(self): with tf.variable_scope(self.name): self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name='inputs') self.action_gradient = tf.placeholder(tf.float32, shape=[None, self.n_actions]) f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense(self.input, units=self.fc1_dims, kernel_initializer=random_uniform( -f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense(layer1_activation, units=self.fc2_dims, kernel_initializer=random_uniform( -f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) layer2_activation = tf.nn.relu(batch2) f3 = 0.003 mu = tf.layers.dense(layer2_activation, units=self.n_actions, activation='tanh', kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3)) self.mu = tf.multiply(mu, self.action_bound)
def _build_net(self, s, name, trainable): f1 = 1 / np.sqrt(270) f3 = 0.003 with tf.variable_scope(name): l1 = tf.layers.dense(s, 270, activation=tf.nn.relu, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1), trainable=trainable) with tf.variable_scope('v'): v = tf.layers.dense(l1, 1, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), trainable=trainable) with tf.variable_scope('advantage'): q_a = tf.layers.dense(l1, self.action_dim, kernel_initializer=random_uniform( -f3, f3), bias_initializer=random_uniform(-f3, f3), trainable=trainable) q_a = q_a - tf.reduce_mean(q_a, axis=1, keep_dims=True) with tf.variable_scope('output_Q'): q = v + q_a return q
def build_network(self): with tf.variable_scope(self.name): self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name='inputs') self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions], name='actions') self.q_target = tf.placeholder(tf.float32, shape=[None, 1], name='targets') f1 = 1. / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense(self.input, units=self.fc1_dims, kernel_initializer=random_uniform( -f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1. / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense(layer1_activation, units=self.fc2_dims, kernel_initializer=random_uniform( -f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) #layer2_activation = tf.nn.relu(batch2) #layer2_activation = tf.nn.relu(dense2) action_in = tf.layers.dense(self.actions, units=self.fc2_dims, activation='relu') #batch2 = tf.nn.relu(batch2) # no activation on action_in and relu activation on state_actions seems to # perform poorly. # relu activation on action_in and relu activation on state_actions # does reasonably well. # relu on batch2 and relu on action in performs poorly #state_actions = tf.concat([layer2_activation, action_in], axis=1) state_actions = tf.add(batch2, action_in) state_actions = tf.nn.relu(state_actions) f3 = 0.003 self.q = tf.layers.dense( state_actions, units=1, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), kernel_regularizer=tf.keras.regularizers.l2(0.01)) self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
def build_network(self): with tf.variable_scope(self.name): self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name='inputs') self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions], name='actions') ''' y[i] = rewards[i] + gamma * Q'( s[i+1], mu'(s[i+1]|theta_mu_') | theta_Q_') Critic update by minimizing loss: L = 1/N * sum( (y[i] - Q(s[i], a[i] | theta_Q)) ** 2 ) grad_wrt_theta_mu(J) = 1/N * sum ( grad_wrt_a(Q(s,a|theta_Q) | s=s[i], a=mu(s[i])) * grad_wrt_theta_mu(mu(s|theta_mu) | s[i])) ''' self.q_target = tf.placeholder(tf.float32, shape=[None, 1], name='targets') f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense(self.input, units=self.fc1_dims, kernel_initializer=random_uniform( -f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense(self.input, units=self.fc2_dims, kernel_initializer=random_uniform( -f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) action_in = tf.layers.dense(self.actions, units=self.fc2_dims, activation='relu') state_actions = tf.add(batch2, action_in) state_actions = tf.nn.relu(state_actions) f3 = 0.003 self.q = tf.layers.dense( state_actions, units=1, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), kernel_regularizer=tf.keras.regularizers.l2(0.01)) self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
def _build_net(self, s, a, name, trainable): f1 = 1 / np.sqrt(400) f2 = 1 / np.sqrt(300) f3 = 0.003 with tf.variable_scope(name): with tf.variable_scope('l1'): l1 = tf.layers.batch_normalization(s) l1 = tf.layers.dense(l1, 400, kernel_initializer=random_uniform( -f1, f1), bias_initializer=random_uniform(-f1, f1), trainable=trainable) l2 = tf.layers.batch_normalization(l1) l2 = tf.layers.dense(l2, 300, kernel_initializer=random_uniform( -f2, f2), bias_initializer=random_uniform(-f2, f2), trainable=trainable) l2 = tf.layers.batch_normalization(l2) a_in = tf.layers.dense( a, 300, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2), trainable=trainable) b2 = tf.layers.batch_normalization(l2 + a_in) net = tf.nn.relu(b2) with tf.variable_scope('q'): q = tf.layers.dense(net, 1, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), trainable=trainable) # Q(s,a) return q
def build_network(self): fc1_dims = 400 fc2_dims = 300 with tf.variable_scope(self.name): self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name='inputs') self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions], name='actions') self.q_target = tf.placeholder(tf.float32, shape=[None, 1], name='targets') f1 = 1 / np.sqrt(fc1_dims) dense1 = tf.layers.dense(self.input, units=fc1_dims, kernel_initializer=random_uniform( -f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(fc2_dims) dense2 = tf.layers.dense(layer1_activation, units=fc2_dims, kernel_initializer=random_uniform( -f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) action_in = tf.layers.dense(self.actions, units=fc2_dims, activation='relu') state_actions = tf.add(batch2, action_in) state_actions = tf.nn.relu(state_actions) f3 = 0.003 self.q = tf.layers.dense( state_actions, units=1, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), kernel_regularizer=tf.keras.regularizers.l2(0.01)) self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
def _build_net(self, s, name, trainable): f1 = 1 / np.sqrt(400) f2 = 1 / np.sqrt(300) f3 = 0.003 with tf.variable_scope(name): l1 = tf.layers.batch_normalization(s) l1 = tf.layers.dense(l1, 400, activation=tf.nn.relu, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1), name='l1', trainable=trainable) l2 = tf.layers.batch_normalization(l1) l2 = tf.layers.dense(l2, 300, activation=tf.nn.relu, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2), name='l2', trainable=trainable) l3 = tf.layers.batch_normalization(l2) with tf.variable_scope('a'): actions = tf.layers.dense( l3, self.action_dim, activation=tf.nn.tanh, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), name='a', trainable=trainable) scaled_a = tf.multiply( actions, self.action_bound, name='scaled_a' ) # Scale output to -action_bound to action_bound return scaled_a
def init_model(self, input_size, n_hidden=50, n_tensors=20, n_layers=2, dropout_rate=0.5, rnn_type='gru', optimizer='nadam', loss_function='categorical_crossentropy'): """ Initialize model. Parameters ---------- input_size: int Input size for this model. n_hidden: int, optional, default: 50 Number of hidden units. n_tensors: int, optional, default: 20 Number of tensors that capture the compositions between input tokens and the prototypes (aspect and opinion). n_layers: int, optional, default: 2 Number of layers for the attention network. dropout_rate: float, optional, default: 0.5 Dropout rate to used during training. rnn_type: {'gru', 'lstm', 'bilstm', 'bigru'}, optional, default: 'gru' Type of RNN used. optimizer: tf.keras.optimizers, optional, default: 'nadam' Optimizer to use during training. See https://www.tensorflow.org/api_docs/python/tf/keras/optimizers. loss_function: tf.keras.losses, optional, default: 'categorical_crossentropy' Loss function to use during training. See https://www.tensorflow.org/api_docs/python/tf/keras/losses. """ input = layers.Input(shape=(None, input_size)) if rnn_type == 'gru': rnn = layers.GRU( units=n_hidden, recurrent_activation='sigmoid', return_sequences=True, kernel_initializer=initializers.random_uniform(-0.2, 0.2), recurrent_initializer=initializers.random_uniform(-0.2, 0.2), dropout=dropout_rate)(input) elif rnn_type == 'lstm': rnn = layers.LSTM( units=n_hidden, recurrent_activation='sigmoid', return_sequences=True, kernel_initializer=initializers.random_uniform(-0.2, 0.2), recurrent_initializer=initializers.random_uniform(-0.2, 0.2), dropout=dropout_rate)(input) elif rnn_type == 'bilstm': rnn = layers.Bidirectional( layers.LSTM(units=n_hidden, recurrent_activation='sigmoid', return_sequences=True, kernel_initializer=initializers.random_uniform( -0.2, 0.2), recurrent_initializer=initializers.random_uniform( -0.2, 0.2), dropout=dropout_rate))(input) elif rnn_type == 'bigru': rnn = layers.Bidirectional( layers.GRU(units=n_hidden, recurrent_activation='sigmoid', return_sequences=True, kernel_initializer=initializers.random_uniform( -0.2, 0.2), recurrent_initializer=initializers.random_uniform( -0.2, 0.2), dropout=dropout_rate))(input) else: raise ValueError( "Unknown rnn type. Valid types are gru, lstm, bilstm, or bigru." ) dropout = layers.Dropout(dropout_rate)(rnn) if rnn_type == 'gru' or rnn_type == 'lstm': cmla = CMLA(n_tensors, n_hidden, n_layers, dropout_rate, rnn_type)(dropout) elif rnn_type == 'bilstm' or rnn_type == 'bigru': cmla = CMLA(n_tensors, 2 * n_hidden, n_layers, dropout_rate, rnn_type)(dropout) aspect_prob = layers.Dense( 3, activation='softmax', kernel_initializer=initializers.random_uniform(-0.2, 0.2))(cmla[0]) opinion_prob = layers.Dense( 3, activation='softmax', kernel_initializer=initializers.random_uniform(-0.2, 0.2))(cmla[1]) self.model = tf.keras.Model(inputs=input, outputs=[aspect_prob, opinion_prob]) self.model.compile(optimizer=optimizer, loss=loss_function, metrics=['accuracy'])
def __init__(self, name, session, observation_space, action_space): self.name = name assert isinstance(action_space, spaces.Box) assert len(action_space.shape) == 1 assert (np.abs(action_space.low) == action_space.high ).all() # we assume symmetric actions. self.session = session with tf.variable_scope(self.name): self.observations_input = tf.placeholder( observation_space.dtype, shape=tuple([None] + list(observation_space.shape)), name="observations_input") self.actions_input = tf.placeholder( action_space.dtype, shape=tuple([None] + list(action_space.shape)), name="actions_input") self.actions_size = action_space.shape[0] self.combined_input = tf.concat( [self.observations_input, self.actions_input], axis=1, name="combined_input") with tf.variable_scope("actor"): self.actor01 = layers.dense( inputs=self.observations_input, units=64, activation=tf.tanh, name="layer_one", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.actor02 = layers.dense( inputs=self.actor01, units=64, activation=tf.tanh, name="layer_two", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.actor_head = layers.dense( inputs=self.actor02, units=self.actions_input.shape[1].value, activation=tf.tanh, name="head", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal(gain=0.01)) self.actor_head_rescaled = self.actor_head * action_space.high self.model_computed_combined = tf.concat( [self.observations_input, self.actor_head_rescaled], axis=1, name="model_computed_combined") with tf.variable_scope("critic"): self.critic01 = layers.dense( inputs=self.combined_input, units=64, activation=tf.tanh, name="layer_one", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.critic02 = layers.dense( inputs=self.critic01, units=64, activation=tf.tanh, name="layer_two", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.action_value_head = layers.dense( inputs=self.critic02, units=1, activation=None, name="head", bias_initializer=initializers.zeros(), kernel_initializer=initializers.random_uniform( -3.0e-3, 3.0e-3)) self.model_critic01 = layers.dense( inputs=self.model_computed_combined, units=64, activation=tf.tanh, name="layer_one", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2)), reuse= True # Use the same weights for 'critic' and for 'model critic' ) self.model_critic02 = layers.dense( inputs=self.model_critic01, units=64, activation=tf.tanh, name="layer_two", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2)), reuse= True # Use the same weights for 'critic' and for 'model critic' ) self.state_value_head = layers.dense( inputs=self.model_critic02, units=1, activation=None, name="head", bias_initializer=initializers.zeros(), kernel_initializer=initializers.random_uniform( -3.0e-3, 3.0e-3), reuse= True # Use the same weights for 'critic' and for 'model critic' )
def __init__(self, n_tensors=20, n_hidden=50, n_layers=2, dropout_rate=0.5, rnn_type='gru', **kwargs): super(CMLA, self).__init__(**kwargs) self.n_tensors = n_tensors self.n_hidden = n_hidden self.n_layers = n_layers self.dropout_rate = dropout_rate self.rnn_type = rnn_type self.dot_layer = layers.Dot(axes=0) if self.rnn_type == 'gru': self.rnn_aspect_layer = layers.GRU( units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform(-0.2, 0.2), recurrent_initializer=initializers.random_uniform(-0.2, 0.2), dropout=self.dropout_rate) self.rnn_opinion_layer = layers.GRU( units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform(-0.2, 0.2), recurrent_initializer=initializers.random_uniform(-0.2, 0.2), dropout=self.dropout_rate) elif self.rnn_type == 'lstm': self.rnn_aspect_layer = layers.LSTM( units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform(-0.2, 0.2), recurrent_initializer=initializers.random_uniform(-0.2, 0.2), dropout=self.dropout_rate) self.rnn_opinion_layer = layers.LSTM( units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform(-0.2, 0.2), recurrent_initializer=initializers.random_uniform(-0.2, 0.2), dropout=self.dropout_rate) elif self.rnn_type == 'bilstm': self.rnn_aspect_layer = layers.Bidirectional( layers.LSTM(units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform( -0.2, 0.2), recurrent_initializer=initializers.random_uniform( -0.2, 0.2), dropout=self.dropout_rate)) self.rnn_opinion_layer = layers.Bidirectional( layers.LSTM(units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform( -0.2, 0.2), recurrent_initializer=initializers.random_uniform( -0.2, 0.2), dropout=self.dropout_rate)) elif self.rnn_type == 'bigru': self.rnn_aspect_layer = layers.Bidirectional( layers.GRU(units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform( -0.2, 0.2), recurrent_initializer=initializers.random_uniform( -0.2, 0.2), dropout=self.dropout_rate)) self.rnn_opinion_layer = layers.Bidirectional( layers.GRU(units=2 * self.n_tensors, recurrent_activation='sigmoid', return_sequences=True, use_bias=False, kernel_initializer=initializers.random_uniform( -0.2, 0.2), recurrent_initializer=initializers.random_uniform( -0.2, 0.2), dropout=self.dropout_rate)) else: raise ValueError( "Unknown rnn type. Valid types are gru, lstm, bilstm, or bigru." )
def build(self, input_shape): """ Create and initialize trainable weights. """ self.m0_a = self.add_weight(name='m0_a', shape=(1, self.n_hidden), initializer=initializers.random_uniform( -0.2, 0.2), trainable=True) self.m0_o = self.add_weight(name='m0_o', shape=(1, self.n_hidden), initializer=initializers.random_uniform( -0.2, 0.2), trainable=True) self.Ua = self.add_weight( name='Ua', shape=(self.n_tensors, self.n_hidden, self.n_hidden), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) self.Uo = self.add_weight( name='Uo', shape=(self.n_tensors, self.n_hidden, self.n_hidden), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) self.Va = self.add_weight( name='Va', shape=(self.n_tensors, self.n_hidden, self.n_hidden), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) self.Vo = self.add_weight( name='Vo', shape=(self.n_tensors, self.n_hidden, self.n_hidden), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) if self.n_layers > 1: if self.rnn_type == 'gru' or self.rnn_type == 'lstm': self.va = self.add_weight( name='va', shape=(2 * self.n_tensors, 1), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) self.vo = self.add_weight( name='vo', shape=(2 * self.n_tensors, 1), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) elif self.rnn_type == 'bilstm' or self.rnn_type == 'bigru': self.va = self.add_weight( name='va', shape=(4 * self.n_tensors, 1), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) self.vo = self.add_weight( name='vo', shape=(4 * self.n_tensors, 1), initializer=initializers.random_uniform(-0.2, 0.2), trainable=True) self.Ma = self.add_weight(name='Ma', shape=(self.n_hidden, self.n_hidden), initializer=initializers.random_uniform( -0.2, 0.2), trainable=True) self.Mo = self.add_weight(name='Mo', shape=(self.n_hidden, self.n_hidden), initializer=initializers.random_uniform( -0.2, 0.2), trainable=True) super(CMLA, self).build(input_shape) # Be sure to call this somewhere!
def build_network(self): with tf.variable_scope(self.name): self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name="inputs") self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions], name="actions") self.q_target = tf.placeholder(tf.float32, shape=[None, 1], name='targets') if self.use_aggregator: self.adjacency = tf.placeholder(tf.float32, shape=[None, 117, 117], name="Adjacency_matrix") self.degree = tf.placeholder(tf.float32, shape=[None, 117, 117], name="Degree_matrix") conv1 = tf.layers.Conv1D(200, 1, activation="linear")(self.input) a_1 = tf.matmul(self.adjacency, conv1) d_1 = tf.matmul(self.degree, a_1) act_1 = tf.nn.relu(d_1) conv_2 = tf.layers.Conv1D(100, 1, activation="linear")(act_1) a_2 = tf.matmul(self.adjacency, conv_2) d_2 = tf.matmul(self.degree, a_2) act_2 = tf.nn.relu(d_2) gmp = tf.keras.layers.GlobalMaxPooling1D()(act_2) f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense( gmp, units=self.fc1_dims, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense( layer1_activation, units=self.fc2_dims, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) action_in = tf.layers.dense(self.actions, units=self.fc2_dims, activation='relu') state_actions = tf.add(batch2, action_in) state_actions = tf.nn.relu(state_actions) f3 = 0.003 self.q = tf.layers.dense( state_actions, units=1, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), kernel_regularizer=tf.keras.regularizers.l2(0.01)) else: f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense( self.input, units=self.fc1_dims, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense( layer1_activation, units=self.fc2_dims, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) action_in = tf.layers.dense(self.actions, units=self.fc2_dims, activation='relu') state_actions = tf.add(batch2, action_in) state_actions = tf.nn.relu(state_actions) f3 = 0.003 self.q = tf.layers.dense( state_actions, units=1, kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3), kernel_regularizer=tf.keras.regularizers.l2(0.01)) self.loss = tf.losses.mean_squared_error(self.q_target, self.q)
def build_network(self, use_aggregator=True): with tf.variable_scope(self.name): self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name="Inputs_features") self.action_gradient = tf.placeholder(tf.float32, shape=[None, self.n_actions]) if self.use_aggregator: self.adjacency = tf.placeholder(tf.float32, shape=[None, 117, 117], name="Adjacency_matrix") self.degree = tf.placeholder(tf.float32, shape=[None, 117, 117], name="Degree_matrix") conv1 = tf.layers.Conv1D(150, 1, activation="linear")(self.input) a_1 = tf.matmul(self.adjacency, conv1) d_1 = tf.matmul(self.degree, a_1) act_1 = tf.nn.relu(d_1) conv_2 = tf.layers.Conv1D(75, 1, activation="linear")(act_1) a_2 = tf.matmul(self.adjacency, conv_2) d_2 = tf.matmul(self.degree, a_2) act_2 = tf.nn.relu(d_2) gmp = tf.keras.layers.GlobalMaxPooling1D()(act_2) f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense( gmp, units=self.fc1_dims, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense( layer1_activation, units=self.fc2_dims, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) layer2_activation = tf.nn.relu(batch2) f3 = 0.003 self.mu = tf.layers.dense( layer2_activation, units=self.n_actions, activation='linear', kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3)) #self.mu = tf.multiply(mu, self.action_bound) else: f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense( self.input, units=self.fc1_dims, kernel_initializer=random_uniform(-f1, f1), bias_initializer=random_uniform(-f1, f1)) batch1 = tf.layers.batch_normalization(dense1) layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense( layer1_activation, units=self.fc2_dims, kernel_initializer=random_uniform(-f2, f2), bias_initializer=random_uniform(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) layer2_activation = tf.nn.relu(batch2) f3 = 0.003 self.mu = tf.layers.dense( layer2_activation, units=self.n_actions, activation='linear', kernel_initializer=random_uniform(-f3, f3), bias_initializer=random_uniform(-f3, f3))