def double_conv_layer(x, filter_size, size, dropout, batch_norm=False): ''' construction of a double convolutional layer using SAME padding RELU nonlinear activation function :param x: input :param filter_size: size of convolutional filter :param size: number of filters :param dropout: FLAG & RATE of dropout. if < 0 dropout cancelled, if > 0 set as the rate :param batch_norm: flag of if batch_norm used, if True batch normalization :return: output of a double convolutional layer ''' axis = 3 conv = layers.Conv2D(size, (filter_size, filter_size), padding='same')(x) if batch_norm is True: conv = layers.BatchNormalization(axis=axis)(conv) conv = layers.Activation('relu')(conv) conv = layers.Conv2D(size, (filter_size, filter_size), padding='same')(conv) if batch_norm is True: conv = layers.BatchNormalization(axis=axis)(conv) conv = layers.Activation('relu')(conv) if dropout > 0: conv = layers.Dropout(dropout)(conv) shortcut = layers.Conv2D(size, kernel_size=(1, 1), padding='same')(x) if batch_norm is True: shortcut = layers.BatchNormalization(axis=axis)(shortcut) res_path = layers.add([shortcut, conv]) return res_path
def build_model(self): l2_kernel_regularization = 1e-5 # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') # Hidden layers for states model_states = layers.Dense( units=32, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) model_states = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( model_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) # Hidden layers for actions model_actions = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_actions) model_actions = layers.BatchNormalization()(model_actions) model_actions = layers.LeakyReLU(1e-2)(model_actions) # Both models merge here model = layers.add([model_states, model_actions]) # Fully connected and batch normalization model = layers.Dense(units=32, kernel_regularizer=regularizers.l2( l2_kernel_regularization))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Q values / output layer Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(l2_kernel_regularization), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), name='output_Q_values')(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def attention_block(x, gating, inter_shape): shape_x = K.int_shape(x) shape_g = K.int_shape(gating) theta_x = layers.Conv2D(inter_shape, (2, 2), strides=(2, 2), padding='same')(x) # 16 shape_theta_x = K.int_shape(theta_x) phi_g = layers.Conv2D(inter_shape, (1, 1), padding='same')(gating) upsample_g = layers.Conv2DTranspose( inter_shape, (3, 3), strides=(shape_theta_x[1] // shape_g[1], shape_theta_x[2] // shape_g[2]), padding='same')(phi_g) # 16 concat_xg = layers.add([upsample_g, theta_x]) act_xg = layers.Activation('relu')(concat_xg) psi = layers.Conv2D(1, (1, 1), padding='same')(act_xg) sigmoid_xg = layers.Activation('sigmoid')(psi) shape_sigmoid = K.int_shape(sigmoid_xg) upsample_psi = layers.UpSampling2D(size=(shape_x[1] // shape_sigmoid[1], shape_x[2] // shape_sigmoid[2]))( sigmoid_xg) # 32 upsample_psi = expend_as(upsample_psi, shape_x[3]) y = layers.multiply([upsample_psi, x]) result = layers.Conv2D(shape_x[3], (1, 1), padding='same')(y) result_bn = layers.BatchNormalization()(result) return result_bn
def _bi_gru(units, x): x = layers.Dropout(0.2)(x) y1 = layers.GRU(units, return_sequences=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal')(x) y2 = layers.GRU(units, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal')(x) y = layers.add([y1, y2]) return y
def residual_block(y, nb_channels_in, nb_channels_out, _strides=(1, 1), _project_shortcut=False): """ Our network consists of a stack of residual blocks. These blocks have the same topology, and are subject to two simple rules: - If producing spatial maps of the same size, the blocks share the same hyper-parameters (width and filter sizes). - Each time the spatial map is down-sampled by a factor of 2, the width of the blocks is multiplied by a factor of 2. """ shortcut = y # we modify the residual building block as a bottleneck design to make the network more economical y = layers.Conv2D(nb_channels_in, kernel_size=(1, 1), strides=(1, 1), padding='same')(y) y = add_common_layers(y) # ResNeXt (identical to ResNet when `cardinality` == 1) y = grouped_convolution(y, nb_channels_in, _strides=_strides) y = add_common_layers(y) y = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=(1, 1), padding='same')(y) # batch normalization is employed after aggregating the transformations and before adding to the shortcut y = layers.BatchNormalization()(y) # identity shortcuts used directly when the input and output are of the same dimensions if _project_shortcut or _strides != (1, 1): # when the dimensions increase projection shortcut is used to match dimensions (done by 1×1 convolutions) # when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2 shortcut = layers.Conv2D(nb_channels_out, kernel_size=(1, 1), strides=_strides, padding='same')(shortcut) shortcut = layers.BatchNormalization()(shortcut) y = layers.add([shortcut, y]) # relu is performed right after each batch normalization, # expect for the output of the block where relu is performed after the adding to the shortcut y = layers.LeakyReLU()(y) return y
def attention_block(x, gating, inter_shape, name): """ self gated attention, attention mechanism on spatial dimension :param x: input feature map :param gating: gate signal, feature map from the lower layer :param inter_shape: intermedium channle numer :param name: name of attention layer, for output :return: attention weighted on spatial dimension feature map """ shape_x = K.int_shape(x) shape_g = K.int_shape(gating) theta_x = layers.Conv2D(inter_shape, (2, 2), strides=(2, 2), padding='same')(x) # 16 shape_theta_x = K.int_shape(theta_x) phi_g = layers.Conv2D(inter_shape, (1, 1), padding='same')(gating) upsample_g = layers.Conv2DTranspose( inter_shape, (3, 3), strides=(shape_theta_x[1] // shape_g[1], shape_theta_x[2] // shape_g[2]), padding='same')(phi_g) # 16 # upsample_g = layers.UpSampling2D(size=(shape_theta_x[1] // shape_g[1], shape_theta_x[2] // shape_g[2]), # data_format="channels_last")(phi_g) concat_xg = layers.add([upsample_g, theta_x]) act_xg = layers.Activation('relu')(concat_xg) psi = layers.Conv2D(1, (1, 1), padding='same')(act_xg) sigmoid_xg = layers.Activation('sigmoid')(psi) shape_sigmoid = K.int_shape(sigmoid_xg) upsample_psi = layers.UpSampling2D(size=(shape_x[1] // shape_sigmoid[1], shape_x[2] // shape_sigmoid[2]), name=name + '_weight')(sigmoid_xg) # 32 upsample_psi = expend_as(upsample_psi, shape_x[3]) y = layers.multiply([upsample_psi, x]) result = layers.Conv2D(shape_x[3], (1, 1), padding='same')(y) result_bn = layers.BatchNormalization()(result) return result_bn
def res_block(inputs, size): kernel_l2_reg = 1e-3 net = layers.Dense(size, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform( minval=-5e-3, maxval=5e-3))(inputs) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense(size, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform( minval=-5e-3, maxval=5e-3))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.add([inputs, net]) return net
def build_model(self): #Define input layers inputStates = layers.Input(shape=(self.state_size, ), name='inputStates') inputActions = layers.Input(shape=(self.action_size, ), name='inputActions') # Hidden layers for states modelS = layers.Dense(units=128, activation='linear')(inputStates) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelS = layers.Dense(units=256, activation='linear')(modelS) modelS = layers.BatchNormalization()(modelS) modelS = layers.LeakyReLU(0.01)(modelS) modelS = layers.Dropout(0.3)(modelS) modelA = layers.Dense(units=256, activation='linear')(inputActions) modelA = layers.LeakyReLU(0.01)(modelA) modelA = layers.BatchNormalization()(modelA) modelA = layers.Dropout(0.5)(modelA) #Merging the models model = layers.add([modelS, modelA]) model = layers.Dense(units=256, activation='linear')(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(0.01)(model) #Q Layer Qvalues = layers.Dense(units=1, activation=None, name='outputQvalues')(model) #Keras model self.model = models.Model(inputs=[inputStates, inputActions], outputs=Qvalues) optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') actionGradients = K.gradients(Qvalues, inputActions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=actionGradients)
def residual_layer(input_tensor, nb_in_filters=64, nb_bottleneck_filters=16, filter_sz=3, stage=0, reg=0.0): bn_name = 'bn' + str(stage) conv_name = 'conv' + str(stage) relu_name = 'relu' + str(stage) merge_name = 'add' + str(stage) # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv if stage>1: # first activation is just after conv1 x = layers.BatchNormalization(axis=-1, name=bn_name+'a')(input_tensor) x = layers.Activation('relu', name=relu_name+'a')(x) else: x = input_tensor x = layers.Conv2D(nb_bottleneck_filters, (1, 1), kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(reg), use_bias=False, name=conv_name+'a')(x) # batchnorm-relu-conv, from nb_bottleneck_filters to nb_bottleneck_filters via FxF conv x = layers.BatchNormalization(axis=-1, name=bn_name+'b')(x) x = layers.Activation('relu', name=relu_name+'b')(x) x = layers.Conv2D(nb_bottleneck_filters, (filter_sz, filter_sz), padding='same', kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(reg), use_bias = False, name=conv_name+'b')(x) # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv x = layers.BatchNormalization(axis=-1, name=bn_name+'c')(x) x = layers.Activation('relu', name=relu_name+'c')(x) x = layers.Conv2D(nb_in_filters, (1, 1), kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(reg), name=conv_name+'c')(x) # merge x = layers.add([x, input_tensor], name=merge_name) return x
def shortcut(self, input, residual): """Adds a shortcut between input and residual block and merges them with "sum" """ # Expand channels of shortcut to match residual. # Stride appropriately to match residual (width, height) # Should be int if network architecture is correctly configured. input_shape = K.int_shape(input) #residual_shape = K.int_shape(residual) try: residual_shape = np.shape(residual).as_list() except: residual_shape = np.shape(residual) stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] #equal_width = input_shape[ROW_AXIS] == residual_shape[ROW_AXIS] #equal_heights = input_shape[COL_AXIS] == residual_shape[COL_AXIS] shortcut = input # 1 X 1 conv if shape is different. Else identity. if stride_width > 1 or stride_height > 1 or not equal_channels: #if not equal_width or not equal_height or not equal_channels: shortcut = layers.Conv2D(filters=residual_shape[CHANNEL_AXIS], kernel_size=(1, 1), strides=(stride_width, stride_height), padding="valid", kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(0.0001))(input) return layers.add([shortcut, residual])
def mbConvBlock(inputs, block_args, global_params, idx, training=True, drop_connect_rate=None): filters = block_args.input_filters * block_args.expand_ratio batch_norm_momentum = global_params.batch_norm_momentum batch_norm_epsilon = global_params.batch_norm_epsilon has_se = (block_args.se_ratio is not None) and ( block_args.se_ratio > 0) and (block_args.se_ratio <= 1) x = inputs # block_name = 'efficientnet-b0_' + 'blocks_' + str(idx) + '_' block_name = 'blocks_' + str(idx) + '_' project_conv_name = block_name + 'conv2d' project_bn_name = block_name + 'tpu_batch_normalization_1' ndbn_name = block_name + 'tpu_batch_normalization' if block_args.expand_ratio != 1: # Expansion phase: expand_conv = layers.Conv2D( filters, kernel_size=[1, 1], strides=[1, 1], kernel_initializer=em.conv_kernel_initializer, padding='same', use_bias=False, name=project_conv_name)(x) bn0 = em.batchnorm(momentum=batch_norm_momentum, epsilon=batch_norm_epsilon, name=ndbn_name)(expand_conv) project_conv_name = block_name + 'conv2d_1' ndbn_name = block_name + 'tpu_batch_normalization_1' project_bn_name = block_name + 'tpu_batch_normalization_2' x = layers.Lambda(lambda x: em.relu_fn(x))(bn0) kernel_size = block_args.kernel_size # Depth-wise convolution phase: depthwise_conv = em.utils.DepthwiseConv2D( [kernel_size, kernel_size], strides=block_args.strides, depthwise_initializer=em.conv_kernel_initializer, padding='same', use_bias=False, name=block_name + 'depthwise_conv2d')(x) bn1 = em.batchnorm(momentum=batch_norm_momentum, epsilon=batch_norm_epsilon, name=ndbn_name)(depthwise_conv) x = layers.Lambda(lambda x: em.relu_fn(x))(bn1) if has_se: num_reduced_filters = max( 1, int(block_args.input_filters * block_args.se_ratio)) # Squeeze and Excitation layer. se_tensor = ReduceMean()(x) se_reduce = layers.Conv2D( num_reduced_filters, kernel_size=[1, 1], strides=[1, 1], kernel_initializer=em.conv_kernel_initializer, padding='same', name=block_name + 'se_' + 'conv2d', use_bias=True)(se_tensor) se_reduce = layers.Lambda(lambda x: em.relu_fn(x))(se_reduce) se_expand = layers.Conv2D( filters, kernel_size=[1, 1], strides=[1, 1], kernel_initializer=em.conv_kernel_initializer, padding='same', name=block_name + 'se_' + 'conv2d_1', use_bias=True)(se_reduce) x = SigmoidMul()([x, se_expand]) # Output phase: filters = block_args.output_filters project_conv = layers.Conv2D(filters, kernel_size=[1, 1], strides=[1, 1], kernel_initializer=em.conv_kernel_initializer, padding='same', name=project_conv_name, use_bias=False)(x) x = em.batchnorm(momentum=batch_norm_momentum, epsilon=batch_norm_epsilon, name=project_bn_name)(project_conv) # x = layers.Lambda(lambda x: em.relu_fn(x))(bn2) if block_args.id_skip: if all(s == 1 for s in block_args.strides ) and block_args.input_filters == block_args.output_filters: # only apply drop_connect if skip presents. if drop_connect_rate: x = em.utils.drop_connect(x, training, drop_connect_rate) x = layers.add([x, inputs]) return x
def build_model(self): kernel_l2_reg = 1e-5 # Dense Options # units = 200, # activation='relu', # activation = None, # activity_regularizer=regularizers.l2(0.01), # kernel_regularizer=regularizers.l2(kernel_l2_reg), # bias_initializer=initializers.Constant(1e-2), # use_bias = True # use_bias=False """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # size_repeat = 30 # state_size = size_repeat*self.state_size # action_size = size_repeat*self.action_size # block_size = size_repeat*self.state_size + size_repeat*self.action_size # print("Critic block size = {}".format(block_size)) # # net_states = layers.concatenate(size_repeat * [states]) # net_states = layers.BatchNormalization()(net_states) # net_states = layers.Dropout(0.2)(net_states) # # net_actions = layers.concatenate(size_repeat * [actions]) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Dropout(0.2)(net_actions) # # # State pathway # for _ in range(3): # net_states = res_block(net_states, state_size) # # # Action pathway # for _ in range(2): # net_actions = res_block(net_actions, action_size) # # # Merge state and action pathways # net = layers.concatenate([net_states, net_actions]) # # # Final blocks # for _ in range(3): # net = res_block(net, block_size) # Add hidden layer(s) for state pathway net_states = layers.Dense( units=300, kernel_regularizer=regularizers.l2(kernel_l2_reg))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Merge state and action pathways net = layers.add([net_states, net_actions]) net = layers.Dense( units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def create_model(self, img_shape, num_class): concat_axis = 3 inputs = layers.Input(shape=img_shape) conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same', name='conv1_1')(inputs) conv1 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1) pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(pool1) conv2 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv2) pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(pool2) conv3 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv3) pool3 = layers.MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(pool3) conv4 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv4) pool4 = layers.MaxPooling2D(pool_size=(2, 2))(conv4) ## Use dilated convolution x = pool4 depth = 3 #3 #6 dilated_layers = [] mode = 'cascade' if mode == 'cascade': for i in range(depth): x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', dilation_rate=2**i)(x) dilated_layers.append(x) conv5 = layers.add(dilated_layers) elif mode == 'parallel': #"Atrous Spatial Pyramid Pooling" for i in range(depth): dilated_layers.append( layers.Conv2D(512, (3, 3), activation='relu', padding='same', dilation_rate=2**i)(x)) conv5 = layers.add(dilated_layers) #conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(pool4) #conv5 = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(conv5) up_conv5 = layers.UpSampling2D(size=(2, 2))(conv5) ch, cw = self.get_crop_shape(conv4, up_conv5) crop_conv4 = layers.Cropping2D(cropping=(ch, cw))(conv4) up6 = layers.concatenate([up_conv5, crop_conv4], axis=concat_axis) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(up6) conv6 = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(conv6) up_conv6 = layers.UpSampling2D(size=(2, 2))(conv6) ch, cw = self.get_crop_shape(conv3, up_conv6) crop_conv3 = layers.Cropping2D(cropping=(ch, cw))(conv3) up7 = layers.concatenate([up_conv6, crop_conv3], axis=concat_axis) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(up7) conv7 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(conv7) up_conv7 = layers.UpSampling2D(size=(2, 2))(conv7) ch, cw = self.get_crop_shape(conv2, up_conv7) crop_conv2 = layers.Cropping2D(cropping=(ch, cw))(conv2) up8 = layers.concatenate([up_conv7, crop_conv2], axis=concat_axis) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(up8) conv8 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(conv8) up_conv8 = layers.UpSampling2D(size=(2, 2))(conv8) ch, cw = self.get_crop_shape(conv1, up_conv8) crop_conv1 = layers.Cropping2D(cropping=(ch, cw))(conv1) up9 = layers.concatenate([up_conv8, crop_conv1], axis=concat_axis) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(up9) conv9 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv9) ch, cw = self.get_crop_shape(inputs, conv9) conv9 = layers.ZeroPadding2D(padding=((ch[0], ch[1]), (cw[0], cw[1])))(conv9) conv10 = layers.Conv2D(num_class, (1, 1))(conv9) model = models.Model(inputs=inputs, outputs=conv10) return model