def _testWithMaybeMultiAttention(self, is_multi, create_attention_mechanisms, expected_final_output, expected_final_state, attention_mechanism_depths, alignment_history=False, expected_final_alignment_history=None, attention_layer_sizes=None, attention_layers=None, create_query_layer=False, create_memory_layer=True, create_attention_kwargs=None): # Allow is_multi to be True with a single mechanism to enable test for # passing in a single mechanism in a list. assert len(create_attention_mechanisms) == 1 or is_multi encoder_sequence_length = [3, 2, 3, 1, 1] decoder_sequence_length = [2, 0, 1, 2, 3] batch_size = 5 encoder_max_time = 8 decoder_max_time = 4 input_depth = 7 encoder_output_depth = 10 cell_depth = 9 create_attention_kwargs = create_attention_kwargs or {} if attention_layer_sizes is not None: # Compute sum of attention_layer_sizes. Use encoder_output_depth if None. attention_depth = sum(attention_layer_size or encoder_output_depth for attention_layer_size in attention_layer_sizes) elif attention_layers is not None: # Compute sum of attention_layers output depth. attention_depth = sum( attention_layer.compute_output_shape( [batch_size, cell_depth + encoder_output_depth]).dims[-1].value for attention_layer in attention_layers) else: attention_depth = encoder_output_depth * len(create_attention_mechanisms) decoder_inputs = np.random.randn(batch_size, decoder_max_time, input_depth).astype(np.float32) encoder_outputs = np.random.randn(batch_size, encoder_max_time, encoder_output_depth).astype(np.float32) attention_mechanisms = [] for creator, depth in zip(create_attention_mechanisms, attention_mechanism_depths): # Create a memory layer with deterministic initializer to avoid randomness # in the test between graph and eager. if create_query_layer: create_attention_kwargs["query_layer"] = keras.layers.Dense( depth, kernel_initializer="ones", use_bias=False) if create_memory_layer: create_attention_kwargs["memory_layer"] = keras.layers.Dense( depth, kernel_initializer="ones", use_bias=False) attention_mechanisms.append( creator( units=depth, memory=encoder_outputs, memory_sequence_length=encoder_sequence_length, **create_attention_kwargs)) with self.cached_session(use_gpu=True): attention_layer_size = attention_layer_sizes attention_layer = attention_layers if not is_multi: if attention_layer_size is not None: attention_layer_size = attention_layer_size[0] if attention_layer is not None: attention_layer = attention_layer[0] cell = keras.layers.LSTMCell(cell_depth, recurrent_activation="sigmoid", kernel_initializer="ones", recurrent_initializer="ones") cell = wrapper.AttentionWrapper( cell, attention_mechanisms if is_multi else attention_mechanisms[0], attention_layer_size=attention_layer_size, alignment_history=alignment_history, attention_layer=attention_layer) if cell._attention_layers is not None: for layer in cell._attention_layers: if getattr(layer, "kernel_initializer") is None: layer.kernel_initializer = initializers.glorot_uniform(seed=1337) sampler = sampler_py.TrainingSampler() my_decoder = basic_decoder.BasicDecoderV2(cell=cell, sampler=sampler) initial_state = cell.get_initial_state( dtype=dtypes.float32, batch_size=batch_size) final_outputs, final_state, _ = my_decoder( decoder_inputs, initial_state=initial_state, sequence_length=decoder_sequence_length) self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput) self.assertIsInstance(final_state, wrapper.AttentionWrapperState) expected_time = ( expected_final_state.time if context.executing_eagerly() else None) self.assertEqual((batch_size, expected_time, attention_depth), tuple(final_outputs.rnn_output.get_shape().as_list())) self.assertEqual((batch_size, expected_time), tuple(final_outputs.sample_id.get_shape().as_list())) self.assertEqual((batch_size, attention_depth), tuple(final_state.attention.get_shape().as_list())) self.assertEqual((batch_size, cell_depth), tuple(final_state.cell_state[0].get_shape().as_list())) self.assertEqual((batch_size, cell_depth), tuple(final_state.cell_state[1].get_shape().as_list())) if alignment_history: if is_multi: state_alignment_history = [] for history_array in final_state.alignment_history: history = history_array.stack() self.assertEqual((expected_time, batch_size, encoder_max_time), tuple(history.get_shape().as_list())) state_alignment_history.append(history) state_alignment_history = tuple(state_alignment_history) else: state_alignment_history = final_state.alignment_history.stack() self.assertEqual((expected_time, batch_size, encoder_max_time), tuple(state_alignment_history.get_shape().as_list())) nest.assert_same_structure(cell.state_size, cell.zero_state(batch_size, dtypes.float32)) # Remove the history from final_state for purposes of the # remainder of the tests. final_state = final_state._replace(alignment_history=()) # pylint: disable=protected-access else: state_alignment_history = () self.evaluate(variables.global_variables_initializer()) eval_result = self.evaluate({ "final_outputs": final_outputs, "final_state": final_state, "state_alignment_history": state_alignment_history, }) final_output_info = nest.map_structure(get_result_summary, eval_result["final_outputs"]) final_state_info = nest.map_structure(get_result_summary, eval_result["final_state"]) print("final_output_info: ", final_output_info) print("final_state_info: ", final_state_info) nest.map_structure(self.assertAllCloseOrEqual, expected_final_output, final_output_info) nest.map_structure(self.assertAllCloseOrEqual, expected_final_state, final_state_info) if alignment_history: # by default, the wrapper emits attention as output final_alignment_history_info = nest.map_structure( get_result_summary, eval_result["state_alignment_history"]) print("final_alignment_history_info: ", final_alignment_history_info) nest.map_structure( self.assertAllCloseOrEqual, # outputs are batch major but the stacked TensorArray is time major expected_final_alignment_history, final_alignment_history_info)
def build(self, input_shape): assert len( input_shape ) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_capsule]" self.input_num_capsule = int(input_shape[1]) self.input_dim_capsule = int(input_shape[2]) # transmission matrix self.reweight_W = self.add_weight( shape=[self.input_dim_capsule, self.num_capsule, self.dim_capsule], initializer=glorot_uniform(seed=self.seed), name='reweight_W') self.num_fields = self.num_capsule self.kernel_mf = self.add_weight( name='kernel_mf', shape=(int(self.num_fields * (self.num_fields - 1) / 2), 1), initializer=tf.keras.initializers.Ones(), regularizer=None, trainable=True) self.kernel_fm = self.add_weight( name='kernel_fm', shape=(self.num_fields, 1), initializer=tf.keras.initializers.Constant(value=0.5), regularizer=None, trainable=True) # self-attention self.kernel_highint = self.add_weight( name='kernel_highint', shape=(self.num_fields, 1), initializer=tf.keras.initializers.Constant(value=0.5), regularizer=None, trainable=True) self.self_attention_factor = self.dim_capsule self.self_attention_layer = 1 self.head_num = 2 # embedding_size=self.self_attention_factor * self.head_num embedding_size = self.dim_capsule self.bias_mf = self.add_weight(name='bias_mf', shape=(embedding_size), initializer=Zeros()) self.bias_fm = self.add_weight(name='bias_fm', shape=(embedding_size), initializer=Zeros()) self.routing_init = self.add_weight( name="routing_init", shape=(self.num_capsule, self.input_num_capsule), initializer=tf.keras.initializers.TruncatedNormal(seed=self.seed, stddev=10)) self.bias_highint = self.add_weight(name='bias_highint', shape=(self.self_attention_factor * self.head_num), initializer=Zeros()) self.built = True # Be sure to call this somewhere! super(CapsuleLayer, self).build(input_shape)
def load_model(self): with CustomObjectScope({'GlorotUniform': glorot_uniform()}): self.model = load_model(self.model_path) self.graph = get_default_graph()
def network_model(inputs, num_classes): # Stage 1 X = Conv2D(64, (7, 7), strides=(1, 1), padding='same', name='conv1', kernel_initializer=glorot_uniform(seed=0))(inputs) X = BatchNormalization(axis=3, name='bn_conv1')(X) X = Activation('relu')(X) X = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(X) # Stage 2 X = convolutional_block(X, f=3, filters=[64, 64, 256], stage=2, block='a', s=1) X = identity_block(X, 3, [64, 64, 256], stage=2, block='b') X = identity_block(X, 3, [64, 64, 256], stage=2, block='c') # Stage 3 X = convolutional_block(X, f=3, filters=[128, 128, 512], stage=3, block='a', s=2) X = identity_block(X, 3, [128, 128, 512], stage=3, block='b') X = identity_block(X, 3, [128, 128, 512], stage=3, block='c') X = identity_block(X, 3, [128, 128, 512], stage=3, block='d') # Stage 4 X = convolutional_block(X, f=3, filters=[256, 256, 1024], stage=4, block='a', s=2) X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e') X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f') # Stage 5 X = convolutional_block(X, f=3, filters=[512, 512, 2048], stage=5, block='a', s=2) X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b') X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c') # Average pooling X = AveragePooling2D(pool_size=(7, 7), strides=(1, 1), name='avg_pool')(X) # output layer X = Flatten()(X) X = Dense(num_classes, activation='softmax', name='fc' + str(num_classes), kernel_initializer=glorot_uniform(seed=0))(X) model = X return model
def convolutional_block(X, f, filters, stage, block, s=2): """ Implementation of the convolutional block as defined in Figure 4 Arguments: X -- input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev) f -- integer, specifying the shape of the middle CONV's window for the main path filters -- python list of integers, defining the number of filters in the CONV layers of the main path stage -- integer, used to name the layers, depending on their position in the network block -- string/character, used to name the layers, depending on their position in the network s -- Integer, specifying the stride to be used Returns: X -- output of the convolutional block, tensor of shape (n_H, n_W, n_C) """ # defining name basis conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' # Retrieve Filters F1, F2, F3 = filters # Save the input value X_shortcut = X ##### MAIN PATH ##### # First component of main path X = Conv2D(F1, (1, 1), strides=(s, s), name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X) X = Activation('relu')(X) # Second component of main path X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X) X = Activation('relu')(X) # Third component of main path X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X) ##### SHORTCUT PATH #### X_shortcut = Conv2D(F3, (1, 1), strides=(s, s), padding='valid', name=conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut) X_shortcut = BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut) # Final step: Add shortcut value to main path, and pass it through a RELU activation X = Add()([X_shortcut, X]) X = Activation('relu')(X) return X
def ResNet50(input_shape=(32, 32, 3), classes=10): X_input = Input(input_shape) X = ZeroPadding2D((3, 3))(X_input) X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis=3, name='bn_conv1')(X) X = Activation('relu')(X) X = MaxPooling2D((3, 3), strides=(2, 2))(X) X = convolutional_block(X, f=3, filters=[64, 64, 256], stage=2, block='a', s=1) X = identity_block(X, 3, [64, 64, 256], stage=2, block='b') X = identity_block(X, 3, [64, 64, 256], stage=2, block='c') X = convolutional_block(X, f=3, filters=[128, 128, 512], stage=3, block='a', s=2) X = identity_block(X, f=3, filters=[128, 128, 512], stage=3, block='b') X = identity_block(X, f=3, filters=[128, 128, 512], stage=3, block='c') X = identity_block(X, f=3, filters=[128, 128, 512], stage=3, block='d') X = convolutional_block(X, f=3, filters=[256, 256, 1024], stage=4, block='a', s=2) X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='b') X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='c') X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='d') X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='e') X = identity_block(X, f=3, filters=[256, 256, 1024], stage=4, block='f') X = convolutional_block(X, f=3, filters=[512, 512, 2048], stage=5, block='a', s=2) X = identity_block(X, f=3, filters=[512, 512, 2048], stage=5, block='b') X = identity_block(X, f=3, filters=[512, 512, 2048], stage=5, block='c') X = Flatten()(X) X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer=glorot_uniform(seed=0))(X) model = Model(inputs=X_input, outputs=X, name='ResNet50') return model
def build(self, input_shape): self.S = self.add_weight(name='S', shape=[1, self.num_seeds, self.dim_V], initializer=glorot_uniform()) super(PMA, self).build(input_shape)
def get_cin_output(inputs, layer_size=[128, 128], split_half=True): while len(inputs.shape) < 3: inputs = tf.expand_dims(inputs, -1) input_shape = inputs.get_shape().as_list() # --- weight initialization --- field_nums = [input_shape[1]] filters = [] bias = [] for i, size in enumerate(layer_size): filters.append( tf.get_variable( name='filter' + str(i), shape=[1, field_nums[-1] * field_nums[0], size], dtype=tf.float32, initializer=glorot_uniform(seed=i), )) bias.append( tf.get_variable(name='bias' + str(i), shape=[size], dtype=tf.float32, initializer=tf.initializers.zeros())) if split_half: if i != len(layer_size) - 1 and size % 2 > 0: raise ValueError( "layer_size must be even number except for the last layer when split_half=True" ) field_nums.append(size // 2) else: field_nums.append(size) dim = input_shape[-1] hidden_nn_layers = [inputs] final_result = [] split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2) for idx, size in enumerate(layer_size): split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2) dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True) dot_result_o = tf.reshape( dot_result_m, shape=[dim, -1, field_nums[0] * field_nums[idx]]) dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2]) curr_out = tf.nn.conv1d(dot_result, filters=filters[idx], stride=1, padding='VALID') curr_out = tf.nn.bias_add(curr_out, bias[idx]) curr_out = tf.nn.relu(curr_out) curr_out = tf.transpose(curr_out, perm=[0, 2, 1]) if split_half: if idx != len(layer_size) - 1: next_hidden, direct_connect = tf.split(curr_out, 2 * [size // 2], 1) else: direct_connect = curr_out next_hidden = 0 else: direct_connect = curr_out next_hidden = curr_out final_result.append(direct_connect) hidden_nn_layers.append(next_hidden) result = tf.concat(final_result, axis=1) result = tf.reduce_sum(result, -1, keep_dims=False) return result
def layers(self): input_layer = Input(self.real_input_shape, self.batch_size) # 64x64x3 # Zero-Padding net = TimeDistributed(ZeroPadding2D((3, 3)))(input_layer) # 70x70x3 # Stage 0 net = TimeDistributed( Conv2D(16, (7, 7), strides=(2, 2), name='conv0', kernel_initializer=glorot_uniform(seed=0)))(net) net = BatchNormalization(axis=-1, name='bn_conv0')(net) net = Activation('relu')(net) # 64x64x16 # Stage 1 net = encoder_convolutional_block(net, f=3, filters=[16, 16, 32], stage=1, block='a', s=1, dropout_rate=0.0) net = encoder_convolutional_block(net, f=3, filters=[16, 16, 32], stage=1, block='b', s=1, dropout_rate=0.0) self.mean_64x64x32 = encoder_residual_block(net, depth=32) self.stddev_64x64x32 = encoder_residual_block(net, depth=32) # 32x32x64 # Stage 2 net = encoder_convolutional_block(net, f=3, filters=[32, 32, 64], stage=2, block='a', s=2, dropout_rate=self.dropout) net = encoder_convolutional_block(net, f=3, filters=[32, 32, 64], stage=2, block='b', s=1, dropout_rate=self.dropout) # 32x32x64 self.mean_32x32x64 = encoder_residual_block(net, depth=64) self.stddev_32x32x64 = encoder_residual_block(net, depth=64) # Stage 3 net = encoder_convolutional_block(net, f=3, filters=[64, 64, 128], stage=3, block='a', s=2, dropout_rate=self.dropout) net = encoder_convolutional_block(net, f=3, filters=[64, 64, 128], stage=3, block='b', s=1, dropout_rate=self.dropout) # 16x16x128 self.mean_16x16x128 = encoder_residual_block(net, depth=128) self.stddev_16x16x128 = encoder_residual_block(net, depth=128) # Stage 4 net = encoder_convolutional_block(net, f=3, filters=[128, 128, 256], stage=4, block='a', s=2, dropout_rate=self.dropout) net = encoder_convolutional_block(net, f=3, filters=[128, 128, 256], stage=4, block='b', s=1, dropout_rate=self.dropout) # 8x8x256 self.mean_8x8x256 = encoder_residual_block(net, depth=256) self.stddev_8x8x256 = encoder_residual_block(net, depth=256) # Stage 5 net = encoder_convolutional_block(net, f=3, filters=[256, 256, 512], stage=5, block='a', s=2, dropout_rate=self.dropout) net = encoder_convolutional_block(net, f=3, filters=[256, 256, 512], stage=5, block='b', s=1, dropout_rate=self.dropout) # 4x4x512 self.mean_4x4x512 = encoder_residual_block(net, depth=512) self.stddev_4x4x512 = encoder_residual_block(net, depth=512) return input_layer, [ self.mean_4x4x512, self.stddev_4x4x512, self.mean_8x8x256, self.stddev_8x8x256, self.mean_16x16x128, self.stddev_16x16x128, self.mean_32x32x64, self.stddev_32x32x64, self.mean_64x64x32, self.stddev_64x64x32 ]
num_words=num_words) ## to suppress warning tf.logging.set_verbosity(tf.logging.ERROR) #tf_config = some_custom_config #sess = tf.Session(config=tf_config) sess = tf.Session() graph = tf.get_default_graph() set_session(sess) ## loading pre-trained image processing model VGG16 model model_transferred = tf.keras.models.load_model( "preloaded_files/model_transferred.h5", compile=False, custom_objects={'GlorotUniform': glorot_uniform()}) model_transferred._make_predict_function() ## loading pre-defined rnn model (2GRU layers, embedding size=512, etc) decoder_model_reddit = tf.keras.models.load_model( "preloaded_files/decoder_model_reddit.h5", compile=False, custom_objects={'GlorotUniform': glorot_uniform()}) decoder_model_reddit._make_predict_function() decoder_model_coco = tf.keras.models.load_model( "preloaded_files/decoder_model_coco.h5", compile=False, custom_objects={'GlorotUniform': glorot_uniform()}) decoder_model_coco._make_predict_function() #graph = tf.get_default_graph()
def convolutional_block(X, f, filters, stage, block, s = 2): conv_name_base = 'res' + str(stage) + block +'_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' F1, F2, F3 = filters X_shortcut = X X = Conv2D(F1, (1, 1), strides = (s, s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X) X = Activation('relu')(X) X = Conv2D(F2, (f, f), strides = (1, 1), name = conv_name_base + '2b', padding = 'same', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X) X = Activation('relu')(X) X = Conv2D(F3, (1, 1), strides = (1, 1), name = conv_name_base + '2c', padding = 'valid', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X) X_shortcut = Conv2D(F3, (1,1), strides=(s, s), name=conv_name_base+'1', padding='valid', kernel_initializer = glorot_uniform(seed=0))(X_shortcut) X_shortcut = BatchNormalization(axis = 3, name=bn_name_base+'1')(X_shortcut) X = Add()([X, X_shortcut]) X = Activation('relu')(X) return X
def identity_block(X, f, filters,stage, block): conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' F1, F2, F3 = filters X_shortcut = X X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1, 1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X) X = Activation('relu')(X) X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1, 1), padding = 'same', name = conv_name_base+'2b', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X) X = Activation('relu')(X) X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = 'valid', name = conv_name_base+'2c', kernel_initializer=glorot_uniform(seed=0))(X) X = BatchNormalization(axis = 3, name = bn_name_base+'2c')(X) X = Add()([X, X_shortcut]) X = Activation('relu')(X) return X
def encoder_convolutional_block(net, f, filters, stage, block, s=2, dropout_rate=DROPOUT_RATE): # Defining name basis conv_name_base = 'enc_res' + str(stage) + block + '_branch' bn_name_base = 'enc_bn' + str(stage) + block + '_branch' # Retrieve Filters f1, f2, f3 = filters # Save the input value net_shortcut = net ############# # MAIN PATH # ############# # First component of main path net = TimeDistributed( ConvSN2D(filters=f1, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0)))(net) net = BatchNormalization(axis=-1, name=bn_name_base + '2a')(net) net = SwishLayer()(net) # Second component of main path net = TimeDistributed(Dropout(dropout_rate))(net) net = TimeDistributed( ConvSN2D(filters=f2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0)))(net) net = BatchNormalization(axis=-1, name=bn_name_base + '2b')(net) net = SwishLayer()(net) # Third component of main path net = TimeDistributed(Dropout(dropout_rate))(net) net = TimeDistributed( ConvSN2D(filters=f3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0)))(net) net = BatchNormalization(axis=-1, name=bn_name_base + '2c')(net) ################# # SHORTCUT PATH # ################# # net_shortcut = TimeDistributed(ConvSN2D(filters=f3, kernel_size=(1, 1), strides=(s, s), # padding='valid', name=conv_name_base + '1', # kernel_initializer=glorot_uniform(seed=0)))(net_shortcut) # net_shortcut = BatchNormalization(axis=-1, name=bn_name_base + '1')(net_shortcut) # nVAE implementation net_shortcut = TimeDistributed( ConvSN2D(filters=f1, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '1a', kernel_initializer=glorot_uniform(seed=0)))(net_shortcut) net_shortcut = BatchNormalization()(net_shortcut) net_shortcut = SwishLayer()(net_shortcut) net_shortcut = TimeDistributed( ConvSN2D(filters=f3, kernel_size=3, name=conv_name_base + "1b", use_bias=False, data_format='channels_last', padding='same'))(net_shortcut) net_shortcut = BatchNormalization()(net_shortcut) net_shortcut = SwishLayer()(net_shortcut) net_shortcut = TimeDistributed( ConvSN2D(filters=f3, kernel_size=3, name=conv_name_base + "1c", use_bias=False, data_format='channels_last', padding='same'))(net_shortcut) net_shortcut = TimeDistributed(SELayer(depth=f3))(net_shortcut) # Final step: Add shortcut value to main path, and pass it through a RELU activation net = Add()([net, net_shortcut]) net = SwishLayer()(net) return net