def GCN_net(): I1 = Input(shape=(no_agents, no_features), name="gcn_input") Adj = Input(shape=(no_agents, no_agents), name="adj") gcn = GCNConv(arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1), use_bias=False, name="Gcn")([I1, Adj]) concat = tf.keras.layers.Concatenate(axis=2)([I1, gcn]) dense = Dense(arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1), name="dense_layer") last_dense = Dense(no_actions, kernel_initializer=tf.keras.initializers.he_uniform(), name="last_dense_layer") split = Lambda(lambda x: tf.squeeze( tf.split(x, num_or_size_splits=no_agents, axis=1), axis=2))(concat) outputs = [] for j in list(range(no_agents)): output = last_dense(dense(split[j])) output = tf.keras.activations.tanh(output) outputs.append(output) V = tf.stack(outputs, axis=1) model = Model([I1, Adj], V) model._name = "final_network" return model
def graph_net(arglist): I1 = Input(shape=(no_agents, no_features), name="graph_input") Adj = Input(shape=(no_agents, no_agents), name="adj") gat = GATConv( arglist.no_neurons, activation='relu', attn_heads=4, concat_heads=True, )([I1, Adj]) concat = tf.keras.layers.Concatenate(axis=2)([I1, gat]) dense = Dense(arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1), name="dense_layer") last_dense = Dense(no_actions, kernel_initializer=tf.keras.initializers.he_uniform(), name="last_dense_layer") split = Lambda(lambda x: tf.squeeze( tf.split(x, num_or_size_splits=no_agents, axis=1), axis=2))(concat) outputs = [] for j in list(range(no_agents)): output = last_dense(dense(split[j])) output = tf.keras.activations.tanh(output) outputs.append(output) V = tf.stack(outputs, axis=1) model = Model([I1, Adj], V) model._name = "final_network" tf.keras.utils.plot_model(model, show_shapes=True) return model
def graph_net(arglist): I1 = Input(shape=(no_agents, no_features), name="gcn_input") Adj = Input(shape=(no_agents, no_agents), name="adj") gat = GATConv( arglist.no_neurons, activation='relu', attn_heads=4, concat_heads=True, )([I1, Adj]) concat = tf.keras.layers.Concatenate(axis=2)([I1, gat]) outputs = [] dense = Dense(arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1), name="dense_layer") dense2 = Dense(arglist.no_neurons / 2, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1), name="sec_dense_layer") state_value = Dense(1, kernel_initializer='he_uniform', name="value_output") state_value_lambda = Lambda(lambda s: K.expand_dims(s[:, 0], -1), output_shape=(no_actions, )) action_advantage = Dense(no_actions, name="advantage_output", kernel_initializer='he_uniform') action_advantage_lambda = Lambda( lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(no_actions, )) split = Lambda(lambda x: tf.squeeze( tf.split(x, num_or_size_splits=no_agents, axis=1), axis=2))(concat) for j in list(range(no_agents)): V = dense(split[j]) V2 = dense2(V) if arglist.dueling: state_value_dense = state_value(V2) state_value_n = state_value_lambda(state_value_dense) action_adj_dense = action_advantage(V2) action_adj_n = action_advantage_lambda(action_adj_dense) output = Add()([state_value_n, action_adj_n]) output = tf.keras.activations.tanh(output) outputs.append(output) else: outputs.append(V2) V = tf.stack(outputs, axis=1) model = Model([I1, Adj], V) model._name = "final_network" tf.keras.utils.plot_model(model, show_shapes=True) return model
def graph_net(arglist): I = [] for _ in range(no_agents): I.append(Input(shape=( arglist.history_size, no_features, ))) outputs = [] temporal_state = None for i in range(no_agents): if arglist.temporal_mode.lower() == "rnn": temporal_state = GRU(arglist.no_neurons)(I[i]) elif arglist.temporal_mode.lower() == "attention": temporal_state = SelfAttention( activation=tf.keras.layers.LeakyReLU(alpha=0.1))(I[i]) temporal_state = Lambda(lambda x: x[:, -1])(temporal_state) else: raise RuntimeError( "Temporal Information Layer should be rnn or attention but %s found!" % arglist.temporal_mode) dense = Dense( arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1))(temporal_state) med_dense = Dense( arglist.no_neurons, kernel_initializer=tf.keras.initializers.he_uniform(), activation=tf.keras.layers.LeakyReLU(alpha=0.1))(dense) last_dense = Dense( no_actions, kernel_initializer=tf.keras.initializers.he_uniform())(med_dense) outputs.append(last_dense) V = tf.stack(outputs, axis=1) model = Model(I, V) model._name = "final_network" tf.keras.utils.plot_model(model, show_shapes=True) return model
def _model(self): from keras.models import Model from keras.layers import Dense, Input, Conv2D, add, Flatten, BatchNormalization, ReLU from keras.optimizers import Adam, SGD from keras.losses import mean_squared_error, binary_crossentropy from keras.regularizers import l2 board_input = Input(shape=(6, 7, 3)) # Start conv block conv_1 = Conv2D(self.filters, (4, 4), padding='same', kernel_regularizer=l2(self.l2_reg))(board_input) norm_1 = BatchNormalization()(conv_1) relu_1 = ReLU()(norm_1) # Residual convolution blocks res_1 = ResBlock(relu_1, self.filters, self.l2_reg) res_2 = ResBlock(res_1, self.filters, self.l2_reg) res_3 = ResBlock(res_2, self.filters, self.l2_reg) res_4 = ResBlock(res_3, self.filters, self.l2_reg) res_5 = ResBlock(res_4, self.filters, self.l2_reg) res_6 = ResBlock(res_5, self.filters, self.l2_reg) # Policy head policy_conv = Conv2D(32, (1, 1), use_bias=False, kernel_regularizer=l2(self.l2_reg))(res_6) policy_norm = BatchNormalization()(policy_conv) policy_relu = ReLU()(policy_norm) policy_flat = Flatten()(policy_relu) # Policy output policy = Dense(7, activation='softmax', name='policy', use_bias=False, kernel_regularizer=l2(self.l2_reg))(policy_flat) # Value head value_conv = Conv2D(32, (1, 1), use_bias=False, kernel_regularizer=l2(self.l2_reg))(res_5) value_norm = BatchNormalization()(value_conv) value_relu_1 = ReLU()(value_norm) value_flat = Flatten()(value_relu_1) value_dense = Dense(32, use_bias=False, kernel_regularizer=l2(self.l2_reg))(value_flat) value_relu_2 = ReLU()(value_dense) # Value output value = Dense(1, activation='tanh', name='value', use_bias=False, kernel_regularizer=l2(self.l2_reg))(value_relu_2) # Final model model = Model(inputs=[board_input], outputs=[policy, value]) # Compile model.compile(optimizer=Adam(0.001), loss={ 'value': 'mse', 'policy': softmax_cross_entropy_with_logits }) # model.compile(optimizer=SGD(0.1, 0.9), loss={'value': 'mse', 'policy': softmax_cross_entropy_with_logits}) # Set the model name model._name = self.name return model
def _model(self): from keras.models import Model from keras.layers import Dense, Input, Conv2D, add, Flatten, BatchNormalization, ReLU from keras.optimizers import Adam, SGD from keras.losses import mean_squared_error, binary_crossentropy from keras.regularizers import l2 from tensorflow.python.util import deprecation deprecation._PRINT_DEPRECATION_WARNINGS = False board_input = Input(shape=(2, 6, 7)) # Start conv block conv_1 = Conv2D(self.config.n_filters, (self.config.kernel, self.config.kernel), padding='same', data_format='channels_first', kernel_regularizer=l2(self.config.l2_reg))(board_input) norm_1 = BatchNormalization(axis=1)(conv_1) relu_1 = ReLU()(norm_1) res = relu_1 # Residual convolution blocks for _ in range(self.config.res_layers): res = ResBlock(res, self.config.n_filters, self.config.kernel, self.config.l2_reg) # Policy head policy_conv = Conv2D(2, (1, 1), data_format='channels_first', kernel_regularizer=l2(self.config.l2_reg))(res) policy_norm = BatchNormalization(axis=1)(policy_conv) policy_relu = ReLU()(policy_norm) policy_flat = Flatten()(policy_relu) # Policy output policy = Dense(7, activation='softmax', name='policy', kernel_regularizer=l2(self.config.l2_reg))(policy_flat) # Value head value_conv = Conv2D(1, (1, 1), data_format='channels_first', kernel_regularizer=l2(self.config.l2_reg))(res) value_norm = BatchNormalization(axis=1)(value_conv) value_relu_1 = ReLU()(value_norm) value_flat = Flatten()(value_relu_1) value_dense = Dense(self.config.value_dense, kernel_regularizer=l2( self.config.l2_reg))(value_flat) value_relu_2 = ReLU()(value_dense) # Value output value = Dense(1, activation='tanh', name='value', kernel_regularizer=l2(self.config.l2_reg))(value_relu_2) # Final model model = Model(inputs=[board_input], outputs=[policy, value]) # Compile model.compile(optimizer=SGD(0.001, momentum=0.9), loss={ 'value': objective_function_for_value, 'policy': objective_function_for_policy }, metrics={'value': [self.mean]}) # Set the model name model._name = self.config.model return model