def call(self, inputs, **kwargs): if self.norm == 'bn': inputs = BatchNormalization(axis=-1)(inputs) return inputs
def BN(name=""): return BatchNormalization(momentum=0.95, name=name, epsilon=1e-5)
def cifar10_model1(n_classes: int, input_shape=None, input_tensor=None, weights_path: Union[None, str] = None) -> Sequential: """ Defines a cifar10 network. :param n_classes: the number of classes. We use this parameter even though we know its value, in order to be able to use the model in order to predict some of the classes. :param input_shape: the input shape of the network. Can be omitted if input_tensor is used. :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used. :param weights_path: a path to a trained custom network's weights. :return: Keras Sequential Model. """ if input_shape is None and input_tensor is None: raise ValueError( 'You need to specify input shape or input tensor for the network.') # Create a Sequential model. model = Sequential(name='cifar10_model1') if input_shape is None: # Create an InputLayer using the input tensor. model.add(InputLayer(input_tensor=input_tensor)) # Define a weight decay for the regularisation. weight_decay = 1e-4 # Block1 if input_tensor is None: first_conv = Conv2D(32, (3, 3), padding='same', activation='elu', name='block1_conv1', kernel_regularizer=l2(weight_decay), input_shape=input_shape) else: first_conv = Conv2D(32, (3, 3), padding='same', activation='elu', name='block1_conv1', kernel_regularizer=l2(weight_decay)) model.add(first_conv) model.add(BatchNormalization(name='block1_batch-norm1')) model.add( Conv2D(32, (3, 3), padding='same', activation='elu', name='block1_conv2', kernel_regularizer=l2(weight_decay))) model.add(BatchNormalization(name='block1_batch-norm2')) model.add(MaxPooling2D(pool_size=(2, 2), name='block1_pool')) model.add(Dropout(0.2, name='block1_dropout', seed=0)) # Block2 model.add( Conv2D(64, (3, 3), padding='same', activation='elu', name='block2_conv1', kernel_regularizer=l2(weight_decay))) model.add(BatchNormalization(name='block2_batch-norm1')) model.add( Conv2D(64, (3, 3), padding='same', activation='elu', name='block2_conv2', kernel_regularizer=l2(weight_decay))) model.add(BatchNormalization(name='block2_batch-norm2')) model.add(MaxPooling2D(pool_size=(2, 2), name='block2_pool')) model.add(Dropout(0.3, name='block2_dropout', seed=0)) # Block3 model.add( Conv2D(128, (3, 3), padding='same', activation='elu', name='block3_conv1', kernel_regularizer=l2(weight_decay))) model.add(BatchNormalization(name='block3_batch-norm1')) model.add( Conv2D(128, (3, 3), padding='same', activation='elu', name='block3_conv2', kernel_regularizer=l2(weight_decay))) model.add(BatchNormalization(name='block3_batch-norm2')) model.add(MaxPooling2D(pool_size=(2, 2), name='block3_pool')) model.add(Dropout(0.4, name='block3_dropout', seed=0)) # Add top layers. model.add(Flatten()) model.add(Dense(n_classes, activation='softmax')) # Load weights, if they exist. load_weights(weights_path, model) return model
def __init__(self, rank, kernel_size, filters, use_bottleneck, bottleneck_filters_multiplier, use_batch_normalization, data_format, activation, use_bias, kernel_initializer, bias_initializer, kernel_regularizer, bias_regularizer, activity_regularizer, kernel_constraint, bias_constraint, **kwargs ): self.rank = rank conv_layer_type = Conv1D if rank is 1 else Conv2D if rank is 2 else Conv3D self.filters = filters self.channel_axis = -1 if data_format == "channels_last" else 1 self.activation = activation self.use_bottleneck = use_bottleneck self.use_batch_normalization = use_batch_normalization # region Main layers initialization self.batch_normalization_layer = BatchNormalization() if use_batch_normalization else None self.conv_layer = conv_layer_type(filters, kernel_size, padding="same", data_format=data_format, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) # endregion # region Bottleneck layers initialization self.bottleneck_batch_normalization_layer = None self.bottleneck_conv_layer = None if use_bottleneck: if use_batch_normalization: self.bottleneck_batch_normalization_layer = BatchNormalization() bottleneck_filters = bottleneck_filters_multiplier * filters self.bottleneck_conv_layer = conv_layer_type(bottleneck_filters, kernel_size=1, padding="same", data_format=data_format, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) # endregion super(CompositeFunctionBlock, self).__init__(**kwargs)
# set seed to be the same random.seed(0) #keras imports and such # Some basic model things %tensorflow_version 2.x import tensorflow.python.keras import tensorflow as tf # from tensorflow.python.keras from tensorflow.python.keras.models import load_model, Model from tensorflow.python.keras.layers import Dense, Dropout, Input, Conv1D, GlobalMaxPooling1D, BatchNormalization, MaxPooling1D from tensorflow.python.keras.layers.merge import Average from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping from tensorflow.python.keras.optimizers import Adam import tensorflow as tf from keras import regularizers hidden_layers = [ Conv1D(filters = 300, kernel_size = 19, activation='relu'), BatchNormalization(), MaxPooling1D(pool_size=3), Dropout(0.3), Dense(1000, activation='relu'), Dropout(0.3), Dense(1, activation='sigmoid') ]
def create_model(noise=True, first_kernel_size=(7, 7), n_filters=64, n_covul_layers=5, activation='swish', dense_neurons=1024, dropout=0.5, lr=0.0001): kernel = (3, 3) n_classes = len(classes) input_layer = Input(shape=(300, 300, 3)) if noise: input_layer = GaussianNoise(0.1)(input_layer) model = BatchNormalization(axis=[1, 2])(input_layer) model = Conv2D(filters=n_filters, kernel_size=first_kernel_size, activation=activation)(model) model = BatchNormalization(axis=[1, 2])(model) model = MaxPooling2D((2, 2))(model) for i in range(2, n_covul_layers): model = Conv2D(filters=n_filters * i, kernel_size=kernel, activation=activation)(model) model = Conv2D(filters=n_filters * i, kernel_size=kernel, activation=activation, padding='same')(model) model = BatchNormalization(axis=[1, 2])(model) model = MaxPooling2D((2, 2))(model) model = Conv2D(filters=n_filters * (n_covul_layers + 1), kernel_size=kernel, activation=activation, padding='same')(model) model = Conv2D(filters=n_filters * (n_covul_layers + 1), kernel_size=kernel, activation=activation, padding='same')(model) model = BatchNormalization(axis=[1, 2])(model) model = MaxPooling2D((2, 2))(model) model = Flatten()(model) model = Dense(dense_neurons, activation=activation)(model) model = BatchNormalization()(model) model = Dropout(dropout)(model) model = Dense(dense_neurons / 2, activation=activation)(model) model = BatchNormalization()(model) model = Dropout(dropout)(model) output = Dense(n_classes, activation="softmax")(model) model = Model(input_layer, output) model.compile(loss="sparse_categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=lr), metrics=["accuracy"]) return model
def seq2seq_architecture(latent_size, vocabulary_size, embedding_matrix, batch_size, epochs, train_article, train_summary, train_target): # encoder encoder_inputs = Input(shape=(None, ), name='Encoder-Input') encoder_embeddings = Embedding( vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Encoder-Word-Embedding')(encoder_inputs) encoder_embeddings = BatchNormalization( name='Encoder-Batch-Normalization')(encoder_embeddings) _, state_h, state_c = LSTM(latent_size, return_state=True, dropout=0.2, recurrent_dropout=0.2, name='Encoder-LSTM')(encoder_embeddings) encoder_states = [state_h, state_c] encoder_model = Model(inputs=encoder_inputs, outputs=encoder_states, name='Encoder-Model') encoder_outputs = encoder_model(encoder_inputs) # decoder decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding( vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Decoder-Word-Embedding')(decoder_inputs) decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1')(decoder_embeddings) decoder_lstm = LSTM(latent_size, return_state=True, return_sequences=True, dropout=0.2, recurrent_dropout=0.2, name='Decoder-LSTM') decoder_lstm_outputs, _, _ = decoder_lstm(decoder_embeddings, initial_state=encoder_outputs) decoder_batchnorm = BatchNormalization( name='Decoder-Batch-Normalization-2')(decoder_lstm_outputs) decoder_outputs = Dense(vocabulary_size + 1, activation='softmax', name='Final-Output-Dense')(decoder_batchnorm) seq2seq_model = Model([encoder_inputs, decoder_inputs], decoder_outputs) seq2seq_model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) seq2seq_model.summary() classes = [item for sublist in train_summary.tolist() for item in sublist] class_weights = class_weight.compute_class_weight('balanced', np.unique(classes), classes) e_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='min', restore_best_weights=True) history = seq2seq_model.fit(x=[train_article, train_summary], y=np.expand_dims(train_target, -1), batch_size=batch_size, epochs=epochs, validation_split=0.1, callbacks=[e_stopping], class_weight=class_weights) f = open("data/models/lstm_results.txt", "w", encoding="utf-8") f.write("LSTM \n layers: 1 \n latent size: " + str(latent_size) + "\n vocab size: " + str(vocabulary_size) + "\n") f.close() history_dict = history.history plot_loss(history_dict) # inference encoder_model = seq2seq_model.get_layer('Encoder-Model') decoder_inputs = seq2seq_model.get_layer('Decoder-Input').input decoder_embeddings = seq2seq_model.get_layer('Decoder-Word-Embedding')( decoder_inputs) decoder_embeddings = seq2seq_model.get_layer( 'Decoder-Batch-Normalization-1')(decoder_embeddings) inference_state_h_input = Input(shape=(latent_size, ), name='Hidden-State-Input') inference_state_c_input = Input(shape=(latent_size, ), name='Cell-State-Input') lstm_out, lstm_state_h_out, lstm_state_c_out = seq2seq_model.get_layer( 'Decoder-LSTM')([ decoder_embeddings, inference_state_h_input, inference_state_c_input ]) decoder_outputs = seq2seq_model.get_layer('Decoder-Batch-Normalization-2')( lstm_out) dense_out = seq2seq_model.get_layer('Final-Output-Dense')(decoder_outputs) decoder_model = Model( [decoder_inputs, inference_state_h_input, inference_state_c_input], [dense_out, lstm_state_h_out, lstm_state_c_out]) return encoder_model, decoder_model
def ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as imagenet with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=197, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = AveragePooling2D((7, 7), name='avg_pool')(x) if include_top: x = Flatten()(x) x = Dense(classes, activation='softmax', name='fc1000')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='resnet50') # load weights if weights == 'imagenet': if include_top: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', md5_hash='a7b3fe01876f51b976af0dea6bc144eb') else: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def conv_bn_lrelu(nb_filters, kernel=(4, 4), stride=(2, 2)): conv2d = Conv2D(nb_filters, kernel, stride, padding="same") bnorm = BatchNormalization() lrelu = LeakyReLU() return Sequential([conv2d, bnorm, lrelu])
def get_Model(training): input_shape = (img_w, img_h, 1) # (128, 64, 1) # Make Networkw inputs = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1) # Convolution layer (VGG) inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')( inputs) # (None, 128, 64, 64) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner) # (None,64, 32, 64) inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')( inner) # (None, 64, 32, 128) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner) # (None, 32, 16, 128) inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')( inner) # (None, 32, 16, 256) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner) # (None, 32, 8, 256) inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner) # (None, 32, 8, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner) # (None, 32, 4, 512) inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner) # (None, 32, 4, 512) inner = BatchNormalization()(inner) inner = Activation('relu')(inner) # CNN to RNN inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048) inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64) # RNN layer lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner) # (None, 32, 512) lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner) reversed_lstm_1b = Lambda( lambda inputTensor: K.reverse(inputTensor, axes=1))(lstm_1b) lstm1_merged = add([lstm_1, reversed_lstm_1b]) # (None, 32, 512) lstm1_merged = BatchNormalization()(lstm1_merged) lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged) lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged) reversed_lstm_2b = Lambda( lambda inputTensor: K.reverse(inputTensor, axes=1))(lstm_2b) lstm2_merged = concatenate([lstm_2, reversed_lstm_2b]) # (None, 32, 1024) lstm2_merged = BatchNormalization()(lstm2_merged) # transforms RNN output to character activations: inner = Dense(num_classes, kernel_initializer='he_normal', name='dense2')(lstm2_merged) #(None, 32, 63) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8) input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1) label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1) # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(focal_ctc_lambda_func, output_shape=(1, ), name='ctc')([labels, y_pred, input_length, label_length]) #(None, 1) if training: return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out) else: return Model(inputs=[inputs], outputs=y_pred)
def __init__(self, training=True): super(VGGATTModel, self).__init__() # block 1 self.block1_conv1 = Conv2D( 64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block1_conv2 = Conv2D( 64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block1_pool = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool') self.block1_batch_norm = BatchNormalization(name='block1_batch_norm') # block 2 self.block2_conv1 = Conv2D( 128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block2_conv2 = Conv2D( 128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block2_pool = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool') self.block2_batch_norm = BatchNormalization(name='block2_batch_norm') # Block 3 self.block3_conv1 = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block3_conv2 = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block3_conv3 = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block3_pool = MaxPooling2D((2, 2), strides=(1, 2), name='block3_pool') self.block3_batch_norm = BatchNormalization(name='block3_batch_norm') # Block 4 self.block4_conv1 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block4_conv2 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block4_conv3 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block4_pool = MaxPooling2D((2, 2), strides=(1, 2), name='block4_pool') self.block4_batch_norm = BatchNormalization(name='block4_batch_norm') # Block 5 self.blcok5_conv1 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block5_conv2 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block5_conv3 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block5_pool = MaxPooling2D((1, 2), strides=(1, 2), name='block5_pool') self.block5_batch_norm = BatchNormalization(name='block5_batch_norm') # Block 6 self.block6_reshape = Reshape(target_shape=(-1, 512)) self.self_attention1 = SelfAttention(name='attention') # Block 7 self.block7_prediction = Dense(units=4651, kernel_initializer='he_normal', name='ctc_y') self.training = training if not training: self.block7_softmax_pred = Activation('softmax', name='softmax')
def DarknetConv2D_BN_Leaky(*args, **kwargs): """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose(DarknetConv2D(*args, **no_bias_kwargs), BatchNormalization(), LeakyReLU(alpha=0.1))
def seq2seq_architecture(latent_size, vocabulary_size, max_len_article, embedding_matrix, batch_size, epochs, train_article, train_summary, train_target): # encoder encoder_inputs = Input(shape=(None, ), name='Encoder-Input') encoder_embeddings = Embedding(vocabulary_size, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Encoder-Word-Embedding') norm_encoder_embeddings = BatchNormalization( name='Encoder-Batch-Normalization') encoder_lstm_1 = LSTM(latent_size, name='Encoder-LSTM-1', return_sequences=True, dropout=0.2, recurrent_dropout=0.2) e = encoder_embeddings(encoder_inputs) e = norm_encoder_embeddings(e) encoder_outputs = encoder_lstm_1(e) encoder_last = encoder_outputs[:, -1, :] # decoder decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding(vocabulary_size, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Decoder-Word-Embedding') norm_decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1') decoder_lstm_1 = LSTM(latent_size, name='Decoder-LSTM-1', return_sequences=True, dropout=0.2, recurrent_dropout=0.2) norm_decoder = BatchNormalization(name='Decoder-Batch-Normalization-2') attention_activation = Activation('softmax', name='Attention') dense_intermediate = TimeDistributed( Dense(64, activation="tanh", name="Intermediate-Output-Dense")) dense_final = TimeDistributed( Dense(vocabulary_size, activation="softmax", name="Final-Output-Dense")) d = decoder_embeddings(decoder_inputs) d = norm_decoder_embeddings(d) decoder_outputs = decoder_lstm_1( d, initial_state=[encoder_last, encoder_last]) decoder_outputs = norm_decoder(decoder_outputs) attention = dot([decoder_outputs, encoder_outputs], axes=[2, 2]) attention = attention_activation(attention) context = dot([attention, encoder_outputs], axes=[2, 1]) decoder_combined_context = concatenate([context, decoder_outputs]) outputs = dense_intermediate(decoder_combined_context) decoder_last = dense_final(outputs) seq2seq_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_last) seq2seq_model.compile(optimizer="rmsprop", loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) seq2seq_model.summary() classes = [item for sublist in train_summary.tolist() for item in sublist] class_weights = class_weight.compute_class_weight('balanced', np.unique(classes), classes) e_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='min', restore_best_weights=True) history = seq2seq_model.fit(x=[train_article, train_summary], y=np.expand_dims(train_target, -1), batch_size=batch_size, epochs=epochs, validation_split=0.1, class_weight=class_weights) f = open("data/models/results.txt", "w", encoding="utf-8") f.write("Attention LSTM \n layers: 1 \n latent size: " + str(latent_size) + "\n vocab size: " + str(vocabulary_size) + "\n") f.close() history_dict = history.history plot_loss(history_dict) return seq2seq_model
def compute_output_shape(self, input_shape): if self.norm == 'bn': input_shape = BatchNormalization( axis=-1).compute_output_shape(input_shape) return input_shape
def __init__( self, n_words: int, n_topics: int = 20, posterior: Literal['gaussian', 'dirichlet'] = 'dirichlet', posterior_activation: Union[str, Callable[[], Tensor]] = 'softplus', concentration_clip: bool = True, distribution: Literal['onehot', 'negativebinomial', 'binomial', 'poisson', 'zinb'] = 'onehot', dropout: float = 0.0, dropout_strategy: Literal['all', 'warmup', 'finetune'] = 'warmup', batch_norm: bool = False, trainable_prior: bool = True, warmup: int = 10000, step: Union[int, Variable] = 0, input_shape: Optional[List[int]] = None, name: str = "Topics", ): super().__init__(name=name) self.n_words = int(n_words) self.n_topics = int(n_topics) self.batch_norm = bool(batch_norm) self.warmup = int(warmup) self.posterior = str(posterior).lower() self.distribution = str(distribution).lower() self.dropout = float(dropout) self.warmup = int(warmup) assert dropout_strategy in ('all', 'warmup', 'finetune'), \ ("Support dropout strategy: all, warmup, finetune; " f"but given:{dropout_strategy}") self.dropout_strategy = str(dropout_strategy) if isinstance(step, Variable): self.step = step else: self.step = Variable(int(step), dtype=tf.float32, trainable=False, name="Step") ### batch norm if self.batch_norm: self._batch_norm_layer = BatchNormalization(trainable=True) ### posterior kw = dict(event_shape=(n_topics, ), name="TopicsPosterior") if posterior == 'dirichlet': kw['posterior'] = DirichletLayer init_value = softplus_inverse(0.7).numpy() post_kw = dict(concentration_activation=posterior_activation, concentration_clip=concentration_clip) elif posterior == "gaussian": kw['posterior'] = MultivariateNormalLayer init_value = 0. post_kw = dict(covariance='diag', loc_activation='identity', scale_activation=posterior_activation) else: raise NotImplementedError( "Support one of the following latent distribution: " "'gaussian', 'dirichlet'") self.topics_prior_logits = self.add_weight( initializer=tf.initializers.constant(value=init_value), shape=[1, n_topics], trainable=bool(trainable_prior), name="topics_prior_logits") self.posterior_layer = DenseDistribution( posterior_kwargs=post_kw, prior=self.topics_prior_distribution, projection=True, **kw) ### output distribution kw = dict(event_shape=(self.n_words, ), name="WordsDistribution") count_activation = 'softplus' if self.distribution in ('onehot', ): self.distribution_layer = OneHotCategoricalLayer(probs_input=True, **kw) self.n_parameterization = 1 elif self.distribution in ('poisson', ): self.distribution_layer = PoissonLayer(**kw) self.n_parameterization = 1 elif self.distribution in ('negativebinomial', 'nb'): self.distribution_layer = NegativeBinomialLayer( count_activation=count_activation, **kw) self.n_parameterization = 2 elif self.distribution in ('zinb', ): self.distribution_layer = ZINegativeBinomialLayer( count_activation=count_activation, **kw) self.n_parameterization = 3 elif self.distribution in ('binomial', ): self.distribution_layer = BinomialLayer( count_activation=count_activation, **kw) self.n_parameterization = 2 else: raise ValueError( f"No support for word distribution: {self.distribution}") # topics words parameterization self.topics_words_params = self.add_weight( 'topics_words_params', shape=[self.n_topics, self.n_words * self.n_parameterization], initializer=tf.initializers.glorot_normal(), trainable=True) # initialize the Model if input_shape given if input_shape is not None: self.build((None, ) + tuple(input_shape))
def up_conv_bn_relu(nb_filters, kernel=(3, 3), stride=(1, 1)): upsample2d = UpSampling2D(size=(2, 2)) conv2d = Conv2D(nb_filters, kernel, stride, padding="same") bnorm = BatchNormalization() relu = Activation('relu') return Sequential([upsample2d, conv2d, bnorm, relu])
def build_network(Inputshape1, Inputshape2, num_class): concat_axis = 3 inputsDCE = layers.Input(shape=Inputshape1) inputsDWI = layers.Input(shape=Inputshape2) # DCE MRI #block 1 conv1 = Conv2D(64, (3, 3), activation=None, dilation_rate=1, padding='same', kernel_initializer='he_uniform', name='conv1')(inputsDCE) conv1 = BatchNormalization(axis=-1, name='BN1')(conv1) conv1 = Activation(activation='relu', name='act_1')(conv1) conv1_2 = Conv2D(64, (3, 3), activation=None, dilation_rate=1, padding='same', kernel_initializer='he_uniform', name='conv1_2')(conv1) conv1_2 = BatchNormalization(axis=-1, name='BN1_2')(conv1_2) conv1_2 = Activation(activation='relu', name='act_1_2')(conv1_2) #block 2 conv2 = Conv2D(64, (3, 3), activation=None, dilation_rate=2, padding='same', kernel_initializer='he_uniform', name='conv2')(conv1_2) conv2 = BatchNormalization(axis=-1, name='BN2')(conv2) conv2 = Activation(activation='relu', name='act_2')(conv2) conv2_2 = Conv2D(64, (3, 3), activation=None, dilation_rate=2, padding='same', kernel_initializer='he_uniform', name='conv2_2')(conv2) conv2_2 = BatchNormalization(axis=-1, name='BN2_2')(conv2_2) conv2_2 = Activation(activation='relu', name='act_2_2')(conv2_2) #block 3 conv3 = Conv2D(64, (3, 3), activation=None, dilation_rate=4, padding='same', kernel_initializer='he_uniform', name='conv3')(conv2_2) conv3 = BatchNormalization(axis=-1, name='BN3')(conv3) conv3 = Activation(activation='relu', name='act_3')(conv3) conv3_2 = Conv2D(64, (3, 3), activation=None, dilation_rate=4, padding='same', kernel_initializer='he_uniform', name='conv3_2')(conv3) conv3_2 = BatchNormalization(axis=-1, name='BN3_2')(conv3_2) conv3_2 = Activation(activation='relu', name='act_3_2')(conv3_2) conv3_3 = Conv2D(64, (3, 3), activation=None, dilation_rate=4, padding='same', kernel_initializer='he_uniform', name='conv3_3')(conv3_2) conv3_3 = BatchNormalization(axis=-1, name='BN3_3')(conv3_3) conv3_3 = Activation(activation='relu', name='act_3_3')(conv3_3) #block 4 conv4 = Conv2D(64, (3, 3), activation=None, dilation_rate=6, padding='same', kernel_initializer='he_uniform', name='conv4')(conv3_3) conv4 = BatchNormalization(axis=-1, name='BN4')(conv4) conv4 = Activation(activation='relu', name='act_4')(conv4) conv4_2 = Conv2D(64, (3, 3), activation=None, dilation_rate=6, padding='same', kernel_initializer='he_uniform', name='conv4_2')(conv4) conv4_2 = BatchNormalization(axis=-1, name='BN4_2')(conv4_2) conv4_2 = Activation(activation='relu', name='act_4_2')(conv4_2) conv4_3 = Conv2D(64, (3, 3), activation=None, dilation_rate=6, padding='same', kernel_initializer='he_uniform', name='conv4_3')(conv4_2) conv4_3 = BatchNormalization(axis=-1, name='BN4_3')(conv4_3) conv4_3 = Activation(activation='relu', name='act_4_3')(conv4_3) #block 5 conv5 = Conv2D(64, (3, 3), activation=None, dilation_rate=8, padding='same', kernel_initializer='he_uniform', name='conv5')(conv4_3) conv5 = BatchNormalization(axis=-1, name='BN5')(conv5) conv5 = Activation(activation='relu', name='act_5')(conv5) conv5_2 = Conv2D(64, (3, 3), activation=None, dilation_rate=8, padding='same', kernel_initializer='he_uniform', name='conv5_2')(conv5) conv5_2 = BatchNormalization(axis=-1, name='BN5_2')(conv5_2) conv5_2 = Activation(activation='relu', name='act_5_2')(conv5_2) conv5_3 = Conv2D(64, (3, 3), activation=None, dilation_rate=8, padding='same', kernel_initializer='he_uniform', name='conv5_3')(conv5_2) conv5_3 = BatchNormalization(axis=-1, name='BN5_3')(conv5_3) conv5_3 = Activation(activation='relu', name='act_5_3')(conv5_3) # DWI #block 1 conv1D = Conv2D(64, (3, 3), activation=None, dilation_rate=1, padding='same', kernel_initializer='he_uniform', name='conv1D')(inputsDWI) conv1D = BatchNormalization(axis=-1, name='BN1D')(conv1D) conv1D = Activation(activation='relu', name='act_1D')(conv1D) conv1_2D = Conv2D(64, (3, 3), activation=None, dilation_rate=1, padding='same', kernel_initializer='he_uniform', name='conv1_2D')(conv1D) conv1_2D = BatchNormalization(axis=-1, name='BN1_2D')(conv1_2D) conv1_2D = Activation(activation='relu', name='act_1_2D')(conv1_2D) #block 2 conv2D = Conv2D(64, (3, 3), activation=None, dilation_rate=2, padding='same', kernel_initializer='he_uniform', name='conv2D')(conv1_2D) conv2D = BatchNormalization(axis=-1, name='BN2D')(conv2D) conv2D = Activation(activation='relu', name='act_2D')(conv2D) conv2_2D = Conv2D(64, (3, 3), activation=None, dilation_rate=2, padding='same', kernel_initializer='he_uniform', name='conv2_2D')(conv2D) conv2_2D = BatchNormalization(axis=-1, name='BN2_2D')(conv2_2D) conv2_2D = Activation(activation='relu', name='act_2_2D')(conv2_2D) #block 3 conv3D = Conv2D(64, (3, 3), activation=None, dilation_rate=4, padding='same', kernel_initializer='he_uniform', name='conv3D')(conv2_2D) conv3D = BatchNormalization(axis=-1, name='BN3D')(conv3D) conv3D = Activation(activation='relu', name='act_3D')(conv3D) conv3_2D = Conv2D(64, (3, 3), activation=None, dilation_rate=4, padding='same', kernel_initializer='he_uniform', name='conv3_2D')(conv3D) conv3_2D = BatchNormalization(axis=-1, name='BN3_2D')(conv3_2D) conv3_2D = Activation(activation='relu', name='act_3_2D')(conv3_2D) conv3_3D = Conv2D(64, (3, 3), activation=None, dilation_rate=4, padding='same', kernel_initializer='he_uniform', name='conv3_3D')(conv3_2D) conv3_3D = BatchNormalization(axis=-1, name='BN3_3D')(conv3_3D) conv3_3D = Activation(activation='relu', name='act_3_3D')(conv3_3D) #block 4 conv4D = Conv2D(64, (3, 3), activation=None, dilation_rate=6, padding='same', kernel_initializer='he_uniform', name='conv4D')(conv3_3D) conv4D = BatchNormalization(axis=-1, name='BN4D')(conv4D) conv4D = Activation(activation='relu', name='act_4D')(conv4D) conv4_2D = Conv2D(64, (3, 3), activation=None, dilation_rate=6, padding='same', kernel_initializer='he_uniform', name='conv4_2D')(conv4D) conv4_2D = BatchNormalization(axis=-1, name='BN4_2D')(conv4_2D) conv4_2D = Activation(activation='relu', name='act_4_2D')(conv4_2D) conv4_3D = Conv2D(64, (3, 3), activation=None, dilation_rate=6, padding='same', kernel_initializer='he_uniform', name='conv4_3D')(conv4_2D) conv4_3D = BatchNormalization(axis=-1, name='BN4_3D')(conv4_3D) conv4_3D = Activation(activation='relu', name='act_4_3D')(conv4_3D) #block 5 conv5D = Conv2D(64, (3, 3), activation=None, dilation_rate=8, padding='same', kernel_initializer='he_uniform', name='conv5D')(conv4_3D) conv5D = BatchNormalization(axis=-1, name='BN5D')(conv5D) conv5D = Activation(activation='relu', name='act_5D')(conv5D) conv5_2D = Conv2D(64, (3, 3), activation=None, dilation_rate=8, padding='same', kernel_initializer='he_uniform', name='conv5_2D')(conv5D) conv5_2D = BatchNormalization(axis=-1, name='BN5_2D')(conv5_2D) conv5_2D = Activation(activation='relu', name='act_5_2D')(conv5_2D) conv5_3D = Conv2D(64, (3, 3), activation=None, dilation_rate=8, padding='same', kernel_initializer='he_uniform', name='conv5_3D')(conv5_2D) conv5_3D = BatchNormalization(axis=-1, name='BN5_3D')(conv5_3D) conv5_3D = Activation(activation='relu', name='act_5_3D')(conv5_3D) concat = layers.concatenate([ conv1_2, conv1_2D, conv2_2, conv2_2D, conv3_2, conv3_2D, conv4_2, conv4_2D, conv5_2, conv5_2D ], axis=concat_axis) #block 6 dropout1 = Dropout(0.2)(concat) conv6 = Conv2D(128, (1, 1), activation=None, dilation_rate=1, kernel_initializer='he_uniform', name='conv6')(dropout1) conv6 = BatchNormalization(axis=-1, name='BN6')(conv6) conv6 = Activation(activation='relu', name='act_6')(conv6) dropout2 = Dropout(0.2)(conv6) conv6_2 = layers.Conv2D(2, (1, 1), activation='softmax')(dropout2) model = models.Model(inputs=[inputsDCE, inputsDWI], outputs=conv6_2) model.compile(optimizer=optimizers.Adam(lr=0.0001, decay=0.0), loss='categorical_crossentropy', metrics=['accuracy'], sample_weight_mode='temporal') # lr was 0.001 return model
def create_norm(norm, coloring, decomposition='zca', iter_num=5, whitten_group=0, coloring_group=0, instance_norm=0, cls=None, number_of_classes=None, filters_emb=10, uncoditional_conv_layer=Conv2D, conditional_conv_layer=ConditionalConv11, factor_conv_layer=FactorizedConv11): assert norm in ['n', 'b', 'd', 'dr'] assert coloring in ['ucs', 'ccs', 'uccs', 'uconv', 'fconv', 'ufconv', 'cconv', 'ucconv', 'ccsuconv', 'n'] if norm == 'n': norm_layer = lambda axis, name: (lambda inp: inp) elif norm == 'b': norm_layer = lambda axis, name: BatchNormalization(axis=axis, center=False, scale=False, name=name) elif norm == 'd': norm_layer = lambda axis, name: DecorelationNormalization(name=name, group=whitten_group, decomposition=decomposition, iter_num=iter_num, instance_norm=instance_norm) elif norm == 'dr': norm_layer = lambda axis, name: DecorelationNormalization(name=name, group=whitten_group, decomposition=decomposition, iter_num=iter_num, instance_norm=instance_norm, renorm=True) if coloring == 'ccs': after_norm_layer = lambda axis, name: lambda x: ConditionalCenterScale(number_of_classes=number_of_classes, axis=axis, name=name)([x, cls]) elif coloring == 'ucs': after_norm_layer = lambda axis, name: lambda x: CenterScale(axis=axis, name=name)(x) elif coloring == 'uccs': def after_norm_layer(axis, name): def f(x): c = ConditionalCenterScale(number_of_classes=number_of_classes, axis=axis, name=name + '_c')([x, cls]) u = CenterScale(axis=axis, name=name + '_u')(x) out = Add(name=name + '_a')([c, u]) return out return f elif coloring == 'cconv': def after_norm_layer(axis, name): def f(x): if coloring_group > 1: splits = Split(coloring_group, axis)(x) outs = [] for i, split in enumerate(splits): split_out = conditional_conv_layer(filters=K.int_shape(x)[axis]//coloring_group, number_of_classes=number_of_classes, name=name+str(i))([split, cls]) outs.append(split_out) out = tf.keras.layers.Concatenate(axis)(outs) else: out = conditional_conv_layer(filters=K.int_shape(x)[axis], number_of_classes=number_of_classes, name=name)([x, cls]) return out return f elif coloring == 'fconv': def after_norm_layer(axis, name): def f(x): if coloring_group > 1: splits = Split(coloring_group, axis)(x) outs = [] for i, split in enumerate(splits): split_out = factor_conv_layer(filters=K.int_shape(x)[axis]//coloring_group, number_of_classes=number_of_classes, name=name + '_c'+str(i), filters_emb=filters_emb, use_bias=False)([split, cls]) outs.append(split_out) out = tf.keras.layers.Concatenate(axis)(outs) else: out = factor_conv_layer(filters=K.int_shape(x)[axis], number_of_classes=number_of_classes, name=name + '_c', filters_emb=filters_emb, use_bias=False)([x, cls]) return out return f elif coloring == 'uconv': def after_norm_layer(axis, name): def f(x): if coloring_group > 1: splits = Split(coloring_group, axis)(x) outs = [] for i, split in enumerate(splits): split_out = uncoditional_conv_layer(filters=K.int_shape(x)[axis]//coloring_group, kernel_size=(1, 1), name=name+str(i))(split) outs.append(split_out) out = tf.keras.layers.Concatenate(axis)(outs) else: out = uncoditional_conv_layer(filters=K.int_shape(x)[axis], kernel_size=(1, 1), name=name)(x) return out return f elif coloring == 'ucconv': def after_norm_layer(axis, name): def f(x): if coloring_group > 1: splits = Split(coloring_group, axis)(x) cs = [] us = [] for i, split in enumerate(splits): split_c = conditional_conv_layer(filters=K.int_shape(x)[axis]//coloring_group, number_of_classes=number_of_classes, name=name + '_c'+str(i))([split, cls]) split_u = uncoditional_conv_layer(kernel_size=(1, 1), filters=K.int_shape(x)[axis], name=name + '_u'+str(i))(split) cs.append(split_c) us.append(split_u) c = tf.keras.layers.Concatenate(axis)(cs) u = tf.keras.layers.Concatenate(axis)(us) else: c = conditional_conv_layer(filters=K.int_shape(x)[axis], number_of_classes=number_of_classes, name=name + '_c')([x, cls]) u = uncoditional_conv_layer(kernel_size=(1, 1), filters=K.int_shape(x)[axis], name=name + '_u')(x) out = Add(name=name + '_a')([c, u]) return out return f elif coloring == 'ccsuconv': def after_norm_layer(axis, name): def f(x): c = ConditionalCenterScale(number_of_classes=number_of_classes, axis=axis, name=name + '_c')([x, cls]) if coloring_group > 1: splits = Split(coloring_group, axis)(x) us = [] for i, split in enumerate(splits): split_u = uncoditional_conv_layer(kernel_size=(1, 1), filters=K.int_shape(x)[axis]//coloring_group, name=name + '_u'+str(i))(split) us.append(split_u) u = tf.keras.layers.Concatenate(axis)(us) else: u = uncoditional_conv_layer(kernel_size=(1, 1), filters=K.int_shape(x)[axis], name=name + '_u')(x) out = Add(name=name + '_a')([c, u]) return out return f elif coloring == 'ufconv': def after_norm_layer(axis, name): def f(x): if coloring_group > 1: splits = Split(coloring_group, axis)(x) cs = [] us = [] for i, split in enumerate(splits): split_c = factor_conv_layer(number_of_classes=number_of_classes, name=name + '_c'+str(i), filters=K.int_shape(x)[axis]//coloring_group, filters_emb=filters_emb, use_bias=False)([split, cls]) split_u = uncoditional_conv_layer(kernel_size=(1, 1), filters=K.int_shape(x)[axis]//coloring_group, name=name + '_u'+str(i))(split) cs.append(split_c) us.append(split_u) c = tf.keras.layers.Concatenate(axis)(cs) u = tf.keras.layers.Concatenate(axis)(us) else: c = factor_conv_layer(number_of_classes=number_of_classes, name=name + '_c', filters=K.int_shape(x)[axis], filters_emb=filters_emb, use_bias=False)([x, cls]) u = uncoditional_conv_layer(kernel_size=(1, 1), filters=K.int_shape(x)[axis], name=name + '_u')(x) out = Add(name=name + '_a')([c, u]) return out return f elif coloring == 'n': after_norm_layer = lambda axis, name: lambda x: x def result_norm(axis, name): def stack(inp): out = inp out = norm_layer(axis=axis, name=name + '_npart')(out) out = after_norm_layer(axis=axis, name=name + '_repart')(out) return out return stack return result_norm
def build_base_network(input_shape=(None,None,3), num_units=16): inputs = Input(shape=input_shape) # BLOCK 1 conv1_1 = Conv2D(num_units, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv1_1')(inputs) norm1_1 = BatchNormalization(name='norm1_1')(conv1_1) conv1_2 = Conv2D(num_units, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv1_2')(norm1_1) norm1_2 = BatchNormalization(name='norm1_2')(conv1_2) conv1_3 = Conv2D(num_units, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv1_3')(norm1_2) norm1_3 = BatchNormalization(name='norm1_3')(conv1_3) # BLOCK 2 conv2_1 = Conv2D(num_units * 2, (3, 3), activation='relu', padding='same', name='conv2_1')(norm1_3) norm2_1 = BatchNormalization(name='norm2_1')(conv2_1) conv2_2 = Conv2D(num_units * 2, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv2_2')(norm2_1) norm2_2 = BatchNormalization(name='norm2_2')(conv2_2) # BLOCK 3 conv3_1 = Conv2D(num_units * 4, (3, 3), activation='relu', padding='same', name='conv3_1')(norm2_2) norm3_1 = BatchNormalization(name='norm3_1')(conv3_1) conv3_2 = Conv2D(num_units * 4, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv3_2')(norm3_1) norm3_2 = BatchNormalization(name='norm3_2')(conv3_2) # BLOCK 4 conv4_1 = Conv2D(num_units * 8, (3, 3), activation='relu', padding='same', name='conv4_1')(norm3_2) norm4_1 = BatchNormalization(name='norm4_1')(conv4_1) conv4_2 = Conv2D(num_units * 8, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv4_2')(norm4_1) norm4_2 = BatchNormalization(name='norm4_2')(conv4_2) # Block 5 conv5_1 = Conv2D(num_units * 8, (3, 3), activation='relu', padding='same', name='conv5_1')(norm4_2) norm5_1 = BatchNormalization(name='norm5_1')(conv5_1) conv5_2 = Conv2D(num_units * 8, (3, 3), strides=(2, 2), activation='relu', padding='same', name='conv5_2')(norm5_1) norm5_2 = BatchNormalization(name='norm5_2')(conv5_2) outputs = norm5_2 return Model(inputs, outputs, name='base_network')
batch_size = 20 train_generator = data_generator.flow_from_directory( 'data/resnetinput_tube/', target_size=(431, 401), batch_size=batch_size, class_mode='categorical') num_classes = len(train_generator.class_indices) model = Sequential() model.add(ResNet50(include_top=False, pooling='avg', weights=None)) model.add(Flatten()) model.add(BatchNormalization()) model.add(Dense(2048, activation='relu')) model.add(BatchNormalization()) model.add(Dense(1024, activation='relu')) model.add(BatchNormalization()) model.add(Dense(num_classes, activation='softmax')) model.layers[0].trainable = False model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) count = sum([len(files) for r, d, files in os.walk("../input/flowers-recognition/flowers/flowers/")]) model.fit_generator( train_generator, steps_per_epoch=100, epochs=10)
def _bn_relu(input): """Helper to build a BN -> relu block """ norm = BatchNormalization(axis=CHANNEL_AXIS)(input) return Activation("relu")(norm)
def layers(self): mask_input = Input(self.input_shape, self.batch_size, name="mask_input") ######################## # Mask encoder network # ######################## # 128x128x3 mask_net = ConvSN2D(filters=16, kernel_size=(3, 3), data_format='channels_last', padding='same')(mask_input) self.mask128x128x16 = MaskResidualLayer(depth=16)(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = SwishLayer()(mask_net) mask_net = MaxPooling2D(pool_size=2)(mask_net) mask_net = Dropout(self.dropout)(mask_net) # 64x64x16 mask_net = ConvSN2D(filters=32, kernel_size=(3, 3), data_format='channels_last', padding='same')(mask_net) self.mask64x64x32 = MaskResidualLayer(depth=32)(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = SwishLayer()(mask_net) mask_net = MaxPooling2D(pool_size=2)(mask_net) mask_net = Dropout(self.dropout)(mask_net) # 32x32x32 mask_net = ConvSN2D(filters=64, kernel_size=(3, 3), data_format='channels_last', padding='same')(mask_net) self.mask32x32x64 = MaskResidualLayer(depth=64)(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = SwishLayer()(mask_net) mask_net = MaxPooling2D(pool_size=2)(mask_net) mask_net = Dropout(self.dropout)(mask_net) # 16x16x64 mask_net = ConvSN2D(filters=128, kernel_size=(3, 3), data_format='channels_last', padding='same')(mask_net) self.mask16x16x128 = MaskResidualLayer(depth=128)(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = SwishLayer()(mask_net) mask_net = MaxPooling2D(pool_size=2)(mask_net) mask_net = Dropout(self.dropout)(mask_net) # 8x8x128 mask_net = ConvSN2D(filters=256, kernel_size=(3, 3), data_format='channels_last', padding='same')(mask_net) self.mask8x8x256 = MaskResidualLayer(depth=256)(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = SwishLayer()(mask_net) mask_net = MaxPooling2D(pool_size=2)(mask_net) mask_net = Dropout(self.dropout)(mask_net) # 4x4x256 mask_net = ConvSN2D(filters=512, kernel_size=(3, 3), data_format='channels_last', padding='same')(mask_net) self.mask4x4x512 = MaskResidualLayer(depth=512)(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = SwishLayer()(mask_net) mask_net = MaxPooling2D(pool_size=2)(mask_net) mask_net = Dropout(self.dropout)(mask_net) # 2x2x512 mask_net = ConvSN2D(filters=1024, kernel_size=(3, 3), data_format='channels_last', padding='same')(mask_net) mask_net = BatchNormalization()(mask_net) mask_net = SwishLayer()(mask_net) mask_net = MaxPooling2D(pool_size=2)(mask_net) # 1x1x1024 mask_net = Dropout(self.dropout)(mask_net) mask_net = Flatten(name="mask_flatten")(mask_net) mask_net = DenseSN(self.latent_size, name="epsilon_DenseSN")(mask_net) self.mask_1024 = mask_net return mask_input, [self.mask_1024, self.mask4x4x512, self.mask8x8x256, self.mask16x16x128, self.mask32x32x64, self.mask64x64x32, self.mask128x128x16]
def cifar10_complicated_ensemble( input_shape=None, input_tensor=None, n_classes=None, weights_path: Union[None, str] = None) -> Model: """ Defines a cifar10 network. :param n_classes: used in order to be compatible with the main script. :param input_shape: the input shape of the network. Can be omitted if input_tensor is used. :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used. :param weights_path: a path to a trained custom network's weights. :return: Keras functional API Model. """ output_list = [] inputs = create_inputs(input_shape, input_tensor) # Define a weight decay for the regularisation. weight_decay = 1e-4 # Submodel 1. # Block1. x1 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel1_block1_conv1', kernel_regularizer=l2(weight_decay))(inputs) x1 = BatchNormalization(name='submodel1_block1_batch-norm')(x1) x1 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel1_block1_conv2', kernel_regularizer=l2(weight_decay))(x1) x1 = MaxPooling2D(pool_size=(2, 2), name='submodel1_block1_pool')(x1) x1 = Dropout(0.2, name='submodel1_block1_dropout', seed=0)(x1) # Block2 x1 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel1_block2_conv1', kernel_regularizer=l2(weight_decay))(x1) x1 = BatchNormalization(name='submodel1_block2_batch-norm')(x1) x1 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel1_block2_conv2', kernel_regularizer=l2(weight_decay))(x1) x1 = MaxPooling2D(pool_size=(2, 2), name='submodel1_block2_pool')(x1) x1 = Dropout(0.4, name='submodel1_block2_dropout', seed=0)(x1) # Add Submodel 1 top layers. x1 = Flatten(name='submodel1_flatten')(x1) outputs1 = Dense(2, name='submodel1_output')(x1) # Crop outputs1 in order to create the first submodel's output. outputs_first_submodel = Crop(1, 0, 1, name='first_class_submodel')(outputs1) output_list.append(outputs_first_submodel) # Submodel 2. x2 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel2_conv1', kernel_regularizer=l2(weight_decay))(inputs) x2 = BatchNormalization(name='submodel2_batch-norm')(x2) x2 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel2_conv2', kernel_regularizer=l2(weight_decay))(x2) x2 = MaxPooling2D(pool_size=(2, 2), name='submodel2_pool')(x2) x2 = Dropout(0.3, name='submodel2_dropout', seed=0)(x2) # Add Submodel 2 top layers. x2 = Flatten(name='submodel2_flatten')(x2) outputs2 = Dense(3, name='submodel2_output')(x2) # Average the predictions for the second class of the first two submodels. averaged_class_2 = Average(name='averaged_second_class')( [Crop(1, 1, 2)(outputs1), Crop(1, 0, 1)(outputs2)]) # Crop outputs2 in order to create the third class output. outputs_class3 = Crop(1, 1, 2, name='third_class')(outputs2) # Concatenate classes outputs in order to create the second submodel's output. outputs_second_submodel = Concatenate(name='second_submodel')( [averaged_class_2, outputs_class3]) output_list.append(outputs_second_submodel) # Submodel 3. x3 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel3_conv1', kernel_regularizer=l2(weight_decay))(inputs) x3 = BatchNormalization(name='submodel3_batch-norm')(x3) x3 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel3_conv2', kernel_regularizer=l2(weight_decay))(x3) x3 = MaxPooling2D(pool_size=(2, 2), name='submodel3_pool')(x3) x3 = Dropout(0.3, name='submodel3_dropout', seed=0)(x3) # Add Submodel 3 top layers. x3 = Flatten(name='submodel3_flatten')(x3) outputs3 = Dense(3, name='submodel3_output')(x3) # Average the predictions for the fourth class of the last two submodels. averaged_class_4 = Average(name='averaged_fourth_class')( [Crop(1, 2, 3)(outputs2), Crop(1, 0, 1)(outputs3)]) # Crop outputs3 in order to create the fifth abd sixth class outputs. outputs_class5 = Crop(1, 1, 2, name='fifth_class')(outputs3) outputs_class6 = Crop(1, 2, 3, name='sixth_class')(outputs3) # Concatenate classes outputs in order to create the third submodel's output. outputs_third_submodel = Concatenate(name='third_submodel')( [averaged_class_4, outputs_class5, outputs_class6]) output_list.append(outputs_third_submodel) # Submodel 4. # Block1. x4 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel4_block1_conv1', kernel_regularizer=l2(weight_decay))(inputs) x4 = BatchNormalization(name='submodel4_block1_batch-norm')(x4) x4 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel4_block1_conv2', kernel_regularizer=l2(weight_decay))(x4) x4 = MaxPooling2D(pool_size=(2, 2), name='submodel4_block1_pool')(x4) x4 = Dropout(0.2, name='submodel4_block1_dropout', seed=0)(x4) # Block2 x4 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel4_block2_conv1', kernel_regularizer=l2(weight_decay))(x4) x4 = BatchNormalization(name='submodel4_block2_batch-norm')(x4) x4 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel4_block2_conv2', kernel_regularizer=l2(weight_decay))(x4) x4 = MaxPooling2D(pool_size=(2, 2), name='submodel4_block2_pool')(x4) x4 = Dropout(0.4, name='submodel4_block2_dropout', seed=0)(x4) # Add Submodel 4 top layers. x4 = Flatten(name='submodel4_flatten')(x4) outputs4 = Dense(2, name='seventh_eighth_class_submodel4')(x4) output_list.append(outputs4) # Submodel 5. # Block1. x5 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel5_block1_conv1', kernel_regularizer=l2(weight_decay))(inputs) x5 = BatchNormalization(name='submodel5_block1_batch-norm')(x5) x5 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel5_block1_conv2', kernel_regularizer=l2(weight_decay))(x5) x5 = MaxPooling2D(pool_size=(2, 2), name='submodel5_block1_pool')(x5) x5 = Dropout(0.2, name='submodel5_block1_dropout', seed=0)(x5) # Block2 x5 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel5_block3_conv1', kernel_regularizer=l2(weight_decay))(x5) x5 = BatchNormalization(name='submodel5_block2_batch-norm')(x5) x5 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel5_block3_conv2', kernel_regularizer=l2(weight_decay))(x5) x5 = MaxPooling2D(pool_size=(2, 2), name='submodel5_block3_pool')(x5) x5 = Dropout(0.4, name='submodel5_block2_dropout', seed=0)(x5) # Add Submodel 5 top layers. x5 = Flatten(name='submodel5_flatten')(x5) outputs5 = Dense(2, name='ninth_tenth_class_submodel5')(x5) output_list.append(outputs5) # Concatenate all class predictions together. outputs = Concatenate(name='output')(output_list) outputs = Softmax(name='output_softmax')(outputs) # Create model. model = Model(inputs, outputs, name='cifar10_complicated_ensemble') # Load weights, if they exist. load_weights(weights_path, model) return model
def layers(self): # Face encoder inputs self.mean_1024 = Input(self.latent_size, self.batch_size, name="mean_1024") self.stddev_1024 = Input(self.latent_size, self.batch_size, name="stddev_1024") skip_4x4x512 = Input((4, 4, 512), self.batch_size, name="skip_4x4x512") skip_8x8x256 = Input((8, 8, 256), self.batch_size, name="skip_8x8x256") skip_16x16x128 = Input((16, 16, 128), self.batch_size, name="skip_16x16x128") skip_32x32x64 = Input((32, 32, 64), self.batch_size, name="skip_32x32x64") skip_64x64x32 = Input((64, 64, 32), self.batch_size, name="skip_64x64x32") skip_128x128x16 = Input((128, 128, 16), self.batch_size, name="skip_128x128x16") # Mask encoder inputs mask_1024 = Input(self.latent_size, self.batch_size, name="mask_1024") mask4x4x512 = Input((4, 4, 512), self.batch_size, name="mask4x4x512") mask8x8x256 = Input((8, 8, 256), self.batch_size, name="mask8x8x256") mask16x16x128 = Input((16, 16, 128), self.batch_size, name="mask16x16x128") mask32x32x64 = Input((32, 32, 64), self.batch_size, name="mask32x32x64") mask64x64x32 = Input((64, 64, 32), self.batch_size, name="mask64x64x32") mask128x128x16 = Input((128, 128, 16), self.batch_size, name="mask128x128x16") sample = SimpleSamplingLayer()([self.mean_1024, self.stddev_1024, mask_1024]) ################### # Decoder network # ################### # reexpand the input from flat: net = Reshape((1, 1, self.latent_size))(sample) net = SwishLayer()(net) # 1x1x1024 net = ConvSN2DTranspose(1024, (3, 3), strides=(2, 2), padding='same')(net) # 2x2x1024 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=1024, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # decode distribution decoded_4x4x512 = ConvSN2DTranspose(512, (3, 3), strides=(2, 2), padding='same')(net) decoded_4x4x1024 = concatenate([skip_4x4x512, decoded_4x4x512]) mean_4x4x512 = NVAEResidualLayer(512)(decoded_4x4x1024) stddev_4x4x512 = NVAEResidualLayer(512)(decoded_4x4x1024) # count delta upscaled_mean_1024 = Reshape((1, 1, 1024))(self.mean_1024) upscaled_mean_1024 = ConvSN2DTranspose(512, (3, 3), strides=(4, 4), padding='same')(upscaled_mean_1024) self.mean_4x4x512 = RelativeMeanLayer()([mean_4x4x512, upscaled_mean_1024]) upscaled_stddev_1024 = Reshape((1, 1, 1024))(self.stddev_1024) upscaled_stddev_1024 = ConvSN2DTranspose(512, (3, 3), strides=(4, 4), padding='same')(upscaled_stddev_1024) self.stddev_4x4x512 = RelativeStddevLayer()([stddev_4x4x512, upscaled_stddev_1024]) # sample distribution sample_4x4x512 = SimpleSamplingLayer()([self.mean_4x4x512, self.stddev_4x4x512, mask4x4x512]) net = Dropout(self.dropout)(net) net = ConvSN2D(filters=1024, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 2x2x1024 net = ConvSN2DTranspose(512, (3, 3), strides=(2, 2), padding='same')(net) # 4x4x512 # concatenate sample from distribution with normal layer net = concatenate([net, sample_4x4x512]) # 4x4x512 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=512, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # decode distribution decoded_8x8x256 = ConvSN2DTranspose(256, (3, 3), strides=(2, 2), padding='same')(net) decoded_8x8x256 = concatenate([skip_8x8x256, decoded_8x8x256]) mean_8x8x256 = NVAEResidualLayer(256)(decoded_8x8x256) stddev_8x8x256 = NVAEResidualLayer(256)(decoded_8x8x256) # count delta upscaled_mean_4x4x512 = ConvSN2DTranspose(256, (3, 3), strides=(2, 2), padding='same')(self.mean_4x4x512) self.mean_8x8x256 = RelativeMeanLayer()([mean_8x8x256, upscaled_mean_4x4x512]) upscaled_stddev_4x4x512 = ConvSN2DTranspose(256, (3, 3), strides=(2, 2), padding='same')(self.stddev_4x4x512) self.stddev_8x8x256 = RelativeStddevLayer()([stddev_8x8x256, upscaled_stddev_4x4x512]) # sample distribution sample_8x8x256 = SimpleSamplingLayer()([self.mean_8x8x256, self.stddev_8x8x256, mask8x8x256]) net = Dropout(self.dropout)(net) net = ConvSN2D(filters=512, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 4x4x512 net = ConvSN2DTranspose(256, (3, 3), strides=(2, 2), padding='same')(net) # 8x8x256 # concatenate sample from distribution with normal layer net = concatenate([net, sample_8x8x256]) # 8x8x256 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=256, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # decode distribution decoded_16x16x128 = ConvSN2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(net) decoded_16x16x128 = concatenate([skip_16x16x128, decoded_16x16x128]) mean_16x16x128 = NVAEResidualLayer(128)(decoded_16x16x128) stddev_16x16x128 = NVAEResidualLayer(128)(decoded_16x16x128) # count delta upscaled_mean_8x8x256 = ConvSN2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(self.mean_8x8x256) self.mean_16x16x128 = RelativeMeanLayer()([mean_16x16x128, upscaled_mean_8x8x256]) upscaled_stddev_8x8x256 = ConvSN2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(self.stddev_8x8x256) self.stddev_16x16x128 = RelativeStddevLayer()([stddev_16x16x128, upscaled_stddev_8x8x256]) # sample distribution sample_16x16x128 = SimpleSamplingLayer()([self.mean_16x16x128, self.stddev_16x16x128, mask16x16x128]) # 8x8x256 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=256, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 8x8x256 net = ConvSN2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(net) # 16x16x128 # concatenate sample from distribution with normal layer net = concatenate([net, sample_16x16x128]) # 16x16x256 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=128, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 16x16x128 # decode distribution decoded_32x32x64 = ConvSN2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(net) decoded_32x32x64 = concatenate([skip_32x32x64, decoded_32x32x64]) mean_32x32x64 = NVAEResidualLayer(64)(decoded_32x32x64) stddev_32x32x64 = NVAEResidualLayer(64)(decoded_32x32x64) # count delta upscaled_mean_16x16x128 = ConvSN2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(self.mean_16x16x128) self.mean_32x32x64 = RelativeMeanLayer()([mean_32x32x64, upscaled_mean_16x16x128]) upscaled_stddev_16x16x128 = ConvSN2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(self.stddev_16x16x128) self.stddev_32x32x64 = RelativeStddevLayer()([stddev_32x32x64, upscaled_stddev_16x16x128]) # sample distribution sample_32x32x64 = SimpleSamplingLayer()([self.mean_32x32x64, self.stddev_32x32x64, mask32x32x64]) net = Dropout(self.dropout)(net) net = ConvSN2D(filters=128, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 16x16x128 net = ConvSN2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(net) # 32x32x64 # concatenate sample from distribution with normal layer net = concatenate([net, sample_32x32x64]) # 32x32x128 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=64, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 32x32x64 # decode distribution decoded_64x64x32 = ConvSN2DTranspose(32, (3, 3), strides=(2, 2), padding='same')(net) decoded_64x64x32 = concatenate([skip_64x64x32, decoded_64x64x32]) mean_64x64x32 = NVAEResidualLayer(32)(decoded_64x64x32) stddev_64x64x32 = NVAEResidualLayer(32)(decoded_64x64x32) # count delta upscaled_mean_32x32x64 = ConvSN2DTranspose(32, (3, 3), strides=(2, 2), padding='same')(self.mean_32x32x64) self.mean_64x64x32 = RelativeMeanLayer()([mean_64x64x32, upscaled_mean_32x32x64]) upscaled_stddev_32x32x64 = ConvSN2DTranspose(32, (3, 3), strides=(2, 2), padding='same')(self.stddev_32x32x64) self.stddev_64x64x32 = RelativeStddevLayer()([stddev_64x64x32, upscaled_stddev_32x32x64]) # sample distribution sample_64x64x32 = SimpleSamplingLayer()([self.mean_64x64x32, self.stddev_64x64x32, mask64x64x32]) net = Dropout(self.dropout)(net) # 32x32x128 net = ConvSN2D(filters=64, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 32x32x64 net = ConvSN2DTranspose(32, (3, 3), strides=(2, 2), padding='same')(net) # 64x64x32 # concatenate sample from distribution with normal layer net = concatenate([net, sample_64x64x32]) # 64x64x64 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=32, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 64x64x32 # decode distribution decoded_128x128x16 = ConvSN2DTranspose(16, (3, 3), strides=(2, 2), padding='same')(net) decoded_128x128x16 = concatenate([skip_128x128x16, decoded_128x128x16]) mean_128x128x16 = NVAEResidualLayer(16)(decoded_128x128x16) stddev_128x128x16 = NVAEResidualLayer(16)(decoded_128x128x16) # count delta upscaled_mean_64x64x32 = ConvSN2DTranspose(16, (3, 3), strides=(2, 2), padding='same')(self.mean_64x64x32) self.mean_128x128x16 = RelativeMeanLayer()([mean_128x128x16, upscaled_mean_64x64x32]) upscaled_stddev_64x64x32 = ConvSN2DTranspose(16, (3, 3), strides=(2, 2), padding='same')(self.stddev_64x64x32) self.stddev_128x128x16 = RelativeStddevLayer()([stddev_128x128x16, upscaled_stddev_64x64x32]) # sample distribution sample_128x128x16 = SimpleSamplingLayer()([self.mean_128x128x16, self.stddev_128x128x16, mask128x128x16]) net = Dropout(self.dropout)(net) # 64x64x64 net = ConvSN2D(filters=32, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) # 64x64x32 net = ConvSN2DTranspose(16, (3, 3), strides=(2, 2), padding='same')(net) # concatenate sample from distribution with normal layer net = concatenate([net, sample_128x128x16]) # 128x128x32 net = Dropout(self.dropout)(net) net = ConvSN2D(filters=16, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) net = BatchNormalization()(net) net = SwishLayer()(net) net = ConvSN2D(filters=3, kernel_size=3, use_bias=False, data_format='channels_last', padding='same')(net) return [self.mean_1024, self.stddev_1024, # face encodings skip_4x4x512, skip_8x8x256, skip_16x16x128, skip_32x32x64, skip_64x64x32, skip_128x128x16, # mask mask_1024, mask4x4x512, mask8x8x256, mask16x16x128, mask32x32x64, mask64x64x32, mask128x128x16], [net, self.mean_1024, self.stddev_1024, self.mean_4x4x512, self.stddev_4x4x512, self.mean_8x8x256, self.stddev_8x8x256, self.mean_16x16x128, self.stddev_16x16x128, self.mean_32x32x64, self.stddev_32x32x64, self.mean_64x64x32, self.stddev_64x64x32, self.mean_128x128x16, self.stddev_128x128x16]
def train(run_name, start_epoch, stop_epoch, img_w, build_word_count, max_string_len, mono_fraction, save_model_path=None): # Input Parameters img_h = 64 words_per_epoch = 16000 val_split = 0.2 val_words = int(words_per_epoch * (val_split)) # Network parameters conv_filters = 16 kernel_size = (3, 3) pool_size = 2 time_dense_size = 32 # GRU output NAN when rnn_size=512 with my GPU, but CPU or rnn_size=256 is ok. # Tensorflow 1.10 appears, but vanishes in 1.12! rnn_size = 512 minibatch_size = 32 # if start_epoch >= 12: # minibatch_size = 8 # 32 is to large for my poor GPU if K.image_data_format() == 'channels_first': input_shape = (1, img_w, img_h) else: input_shape = (img_w, img_h, 1) img_gen = TextImageGenerator(minibatch_size=minibatch_size, img_w=img_w, img_h=img_h, downsample_factor=(pool_size**2), val_split=words_per_epoch - val_words, build_word_count=build_word_count, max_string_len=max_string_len, mono_fraction=mono_fraction) act = 'relu' kernel_init = 'he_normal' input_data = Input(name='the_input', shape=input_shape, dtype='float32') inner = Conv2D(conv_filters, kernel_size, padding='same', activation=None, kernel_initializer=kernel_init, name='conv1')(input_data) inner = BatchNormalization(axis=3, scale=False, name='bn1')(inner) inner = Activation(activation=act)(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) inner = Conv2D(conv_filters, kernel_size, padding='same', activation=None, kernel_initializer=kernel_init, name='conv2')(inner) inner = BatchNormalization(axis=3, scale=False, name='bn2')(inner) inner = Activation(activation=act)(inner) inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) conv_to_rnn_dims = (img_w // (pool_size**2), (img_h // (pool_size**2)) * conv_filters) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) # cuts down input size going into RNN: inner = Dense(time_dense_size, activation=act, name='dense1')(inner) # bidirectional GRU, GRU seems to work as well, if not better than LSTM: gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer=kernel_init, name='gru1')(inner) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer=kernel_init, name='gru1_b')(inner) gru1_merged = concatenate([gru_1, gru_1b]) # transforms RNN output to character activations: inner = Dense(img_gen.get_output_size(), kernel_initializer=kernel_init, name='dense2')(gru1_merged) y_pred = Activation('softmax', name='softmax')(inner) labels = Input(name='the_labels', shape=[img_gen.max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) # clipnorm seems to speeds up convergence sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd, metrics=['accuracy']) if start_epoch > 0: weight_file = os.path.join( OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) model.load_weights(weight_file) print("load_weight: ", weight_file) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) model.fit_generator(generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words) // minibatch_size, epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words // minibatch_size, callbacks=[viz_cb, img_gen], initial_epoch=start_epoch, verbose=1) if save_model_path: predict_model = Model(inputs=input_data, outputs=y_pred) predict_model.save(save_model_path)
def layers(self): input_layer = Input(self.real_input_shape, self.batch_size) # 128x128x3 net = TimeDistributed(ConvSN2D(filters=16, kernel_size=3, use_bias=False, data_format='channels_last', padding='same'))(input_layer) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) # skip connection self.skip_128x128x16 = EncoderResidualLayer(depth=16, name="skip_128x128x16")(net) net = TimeDistributed(ConvSN2D(filters=16, kernel_size=3, use_bias=False, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) # 64x64x32 net = TimeDistributed(ConvSN2D(filters=32, kernel_size=3, use_bias=False, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) # skip connection self.skip_64x64x32 = EncoderResidualLayer(depth=32, name="skip_64x64x32")(net) net = TimeDistributed(ConvSN2D(filters=32, kernel_size=3, use_bias=False, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) # 32x32x64 net = TimeDistributed(ConvSN2D(filters=64, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) # skip connection self.skip_32x32x64 = EncoderResidualLayer(depth=64, name="skip_32x32x64")(net) net = TimeDistributed(ConvSN2D(filters=64, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) # 16x16x64 net = TimeDistributed(ConvSN2D(filters=128, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) # skip connection self.skip_16x16x128 = EncoderResidualLayer(depth=128, name="skip_16x16x128")(net) net = TimeDistributed(ConvSN2D(filters=128, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) # 8x8x128 net = TimeDistributed(ConvSN2D(filters=256, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) # skip connection self.skip_8x8x256 = EncoderResidualLayer(depth=256, name="skip_8x8x256")(net) net = TimeDistributed(ConvSN2D(filters=256, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) # 4x4x256 net = TimeDistributed(ConvSN2D(filters=512, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) # skip connection self.skip_4x4x512 = EncoderResidualLayer(depth=512, name="skip_4x4x512")(net) net = TimeDistributed(ConvSN2D(filters=512, kernel_size=(3, 3), data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(SwishLayer())(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) mean = EncoderResidualLayer(depth=self.latent_size, name="mean_2x2x1024")(net) mean = MaxPool2D(pool_size=(2, 2), name="mean_max_pooling")(mean) mean = Flatten(name="mean_flatten")(mean) # mean = DenseSN(self.latent_size, # name="mean")(mean) self.mean_1024 = mean # stddev = ConvSN2D(filters=self.latent_size, kernel_size=(1, 1), # padding='same', name="stddev_convolution")(net) stddev = EncoderResidualLayer(depth=self.latent_size, name="stddev_2x2x1024")(net) stddev = MaxPool2D(pool_size=(2, 2), name="stddev_max_pooling")(stddev) stddev = Flatten(name="stddev_flatten")(stddev) # stddev = DenseSN(self.latent_size, # name="stddev")(stddev) self.stddev_1024 = stddev return input_layer, [self.mean_1024, self.stddev_1024, self.skip_4x4x512, self.skip_8x8x256, self.skip_16x16x128, self.skip_32x32x64, self.skip_64x64x32, self.skip_128x128x16]
def cifar100_pyramid_ensemble(input_shape=None, input_tensor=None, n_classes=None, weights_path: Union[None, str] = None) -> Model: """ Defines a cifar100 network. :param n_classes: used in order to be compatible with the main script. :param input_shape: the input shape of the network. Can be omitted if input_tensor is used. :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used. :param weights_path: a path to a trained custom network's weights. :return: Keras functional API Model. """ output_list = [] inputs = create_inputs(input_shape, input_tensor) # Submodel Strong. # Block1. x1 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_strong_block1_conv1')(inputs) x1 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_strong_block1_conv2')(x1) x1 = MaxPooling2D(pool_size=(2, 2), name='submodel_strong_block1_pool')(x1) # Block2 x1 = Conv2D(128, (3, 3), padding='same', activation='elu', name='submodel_strong_block2_conv1')(x1) x1 = Conv2D(128, (3, 3), padding='same', activation='elu', name='submodel_strong_block2_conv2')(x1) x1 = MaxPooling2D(pool_size=(2, 2), name='submodel_strong_block2_pool')(x1) # Block3 x1 = BatchNormalization(name='submodel_strong_block3_batch-norm')(x1) x1 = Conv2D(256, (3, 3), padding='same', activation='elu', name='submodel_strong_block3_conv')(x1) x1 = Dropout(0.5, name='submodel_strong_block3_dropout', seed=0)(x1) # Add Submodel Strong top layers. x1 = Flatten(name='submodel_strong_flatten')(x1) outputs_submodel_strong = Dense(100, name='submodel_strong_output')(x1) # Submodel Weak 1. # Block1. x2 = Conv2D(128, (3, 3), padding='same', activation='elu', name='submodel_weak_1_block1_conv1')(inputs) x2 = Conv2D(128, (3, 3), padding='same', activation='elu', name='submodel_weak_1_block1_conv2')(x2) x2 = MaxPooling2D(pool_size=(2, 2), name='submodel_weak_1_block1_pool')(x2) # Add Submodel Weak 1 top layers. x2 = Flatten(name='submodel_weak_1_flatten')(x2) outputs2 = Dense(50, name='submodel_weak_1_output')(x2) # Average the predictions for the first five classes. averaged_first_half_classes = Average(name='averaged_first_half_classes')( [Crop(1, 0, 50)(outputs_submodel_strong), outputs2]) output_list.append(averaged_first_half_classes) # Submodel Weak 2. # Block1. x3 = Conv2D(128, (3, 3), padding='same', activation='elu', name='submodel_weak_2_block1_conv1')(inputs) x3 = Conv2D(128, (3, 3), padding='same', activation='elu', name='submodel_weak_2_block1_conv2')(x3) x3 = MaxPooling2D(pool_size=(2, 2), name='submodel_weak_2_block1_pool')(x3) # Add Submodel Weak 2 top layers. x3 = Flatten(name='submodel_weak_2_flatten')(x3) outputs3 = Dense(50, name='submodel_weak_2_output')(x3) # Average the predictions for the last five classes. averaged_last_half_classes = Average(name='averaged_last_half_classes')( [Crop(1, 50, 100)(outputs_submodel_strong), outputs3]) output_list.append(averaged_last_half_classes) # Concatenate all class predictions together. outputs = Concatenate(name='output')(output_list) outputs = Softmax(name='output_softmax')(outputs) # Create model. model = Model(inputs, outputs, name='cifar100_pyramid_ensemble') # Load weights, if they exist. load_weights(weights_path, model) return model
def __merge_temporal_features(feature, mode='conv', feature_size=256, frames_per_batch=1): """ Merges feature with its temporal residual through addition. Input feature (x) --> Temporal convolution* --> Residual feature (x') *Type of temporal convolution specified by "mode" argument Output: y = x + x' Args: feature (tensorflow.keras.layers.Layer): Input layer mode (str, optional): Mode of temporal convolution. Choose from {'conv','lstm','gru', None}. Defaults to 'conv'. feature_size (int, optional): Defaults to 256. frames_per_batch (int, optional): Defaults to 1. Raises: ValueError: Mode not 'conv', 'lstm', 'gru' or None Returns: tensorflow.keras.layers.Layer: Input feature merged with its residual from a temporal convolution. If mode is None, the output is exactly the input. """ # Check inputs to mode acceptable_modes = {'conv', 'lstm', 'gru', None} if mode is not None: mode = str(mode).lower() if mode not in acceptable_modes: raise ValueError('Mode {} not supported. Please choose ' 'from {}.'.format(mode, str(acceptable_modes))) f_name = str(feature.name)[:2] if mode == 'conv': x = Conv3D(feature_size, (frames_per_batch, 3, 3), strides=(1, 1, 1), padding='same', name='conv3D_mtf_{}'.format(f_name), )(feature) x = BatchNormalization(axis=-1, name='bnorm_mtf_{}'.format(f_name))(x) x = Activation('relu', name='acti_mtf_{}'.format(f_name))(x) elif mode == 'lstm': x = ConvLSTM2D(feature_size, (3, 3), padding='same', activation='relu', return_sequences=True, name='convLSTM_mtf_{}'.format(f_name))(feature) elif mode == 'gru': x = ConvGRU2D(feature_size, (3, 3), padding='same', activation='relu', return_sequences=True, name='convGRU_mtf_{}'.format(f_name))(feature) else: x = feature temporal_feature = x return temporal_feature
def make_model(classes, points_per_sample, channel_mode='channels_last', batch_size=32): # creates the Time Distributed CNN for range Doppler heatmap ########################## mmw_rdpl_input = (int(points_per_sample), ) + rd_shape + ( 1, ) if channel_mode == 'channels_last' else (points_per_sample, 1) + rd_shape mmw_rdpl_TDCNN = Sequential() mmw_rdpl_TDCNN.add( TimeDistributed(Conv2D( filters=8, kernel_size=(3, 3), data_format=channel_mode, kernel_regularizer=tf.keras.regularizers.l2(l=1e-5), bias_regularizer=tf.keras.regularizers.l2(l=1e-5), activity_regularizer=tf.keras.regularizers.l2(l=1e-5), kernel_initializer='random_uniform'), input_shape=mmw_rdpl_input) ) # use batch input size to avoid memory error mmw_rdpl_TDCNN.add(TimeDistributed(tf.keras.layers.LeakyReLU(alpha=0.1))) mmw_rdpl_TDCNN.add(TimeDistributed(BatchNormalization())) mmw_rdpl_TDCNN.add( TimeDistributed( Conv2D(filters=16, kernel_size=(3, 3), kernel_regularizer=tf.keras.regularizers.l2(l=1e-5), bias_regularizer=tf.keras.regularizers.l2(l=1e-5), activity_regularizer=tf.keras.regularizers.l2(l=1e-5)))) mmw_rdpl_TDCNN.add(TimeDistributed(tf.keras.layers.LeakyReLU(alpha=0.1))) mmw_rdpl_TDCNN.add(TimeDistributed(BatchNormalization())) mmw_rdpl_TDCNN.add(TimeDistributed(MaxPooling2D(pool_size=2))) # mmw_rdpl_TDCNN.add(TimeDistributed( # Conv2D(filters=32, kernel_size=(3, 3), # kernel_regularizer=tf.keras.regularizers.l2(l=0.01), # bias_regularizer=tf.keras.regularizers.l2(l=0.01)))) # mmw_rdpl_TDCNN.add(TimeDistributed(tf.keras.layers.LeakyReLU(alpha=0.1))) # mmw_rdpl_TDCNN.add(TimeDistributed(BatchNormalization())) # mmw_rdpl_TDCNN.add(TimeDistributed(MaxPooling2D(pool_size=2))) mmw_rdpl_TDCNN.add(TimeDistributed( Flatten())) # this should be where layers meets # creates the Time Distributed CNN for range Azimuth heatmap ########################### mmw_razi_input = (int(points_per_sample), ) + ra_shape + ( 1, ) if channel_mode == 'channels_last' else (points_per_sample, 1) + ra_shape mmw_razi_TDCNN = Sequential() mmw_razi_TDCNN.add( TimeDistributed(Conv2D( filters=8, kernel_size=(3, 3), data_format=channel_mode, kernel_regularizer=tf.keras.regularizers.l2(l=1e-5), bias_regularizer=tf.keras.regularizers.l2(l=1e-5), activity_regularizer=tf.keras.regularizers.l2(l=1e-5), kernel_initializer='random_uniform'), input_shape=mmw_razi_input) ) # use batch input size to avoid memory error mmw_razi_TDCNN.add(TimeDistributed(tf.keras.layers.LeakyReLU(alpha=0.1))) mmw_razi_TDCNN.add(TimeDistributed(BatchNormalization())) mmw_razi_TDCNN.add( TimeDistributed( Conv2D(filters=16, kernel_size=(3, 3), kernel_regularizer=tf.keras.regularizers.l2(l=1e-5), bias_regularizer=tf.keras.regularizers.l2(l=1e-5), activity_regularizer=tf.keras.regularizers.l2(l=1e-5)))) mmw_razi_TDCNN.add(TimeDistributed(tf.keras.layers.LeakyReLU(alpha=0.1))) mmw_razi_TDCNN.add(TimeDistributed(BatchNormalization())) mmw_razi_TDCNN.add(TimeDistributed(MaxPooling2D(pool_size=2))) # mmw_razi_TDCNN.add(TimeDistributed( # Conv2D(filters=32, kernel_size=(3, 3), data_format=channel_mode, # kernel_regularizer=tf.keras.regularizers.l2(l=0.01), # bias_regularizer=tf.keras.regularizers.l2(l=0.01)))) # mmw_rdpl_TDCNN.add(TimeDistributed(tf.keras.layers.LeakyReLU(alpha=0.1))) # mmw_razi_TDCNN.add(TimeDistributed(BatchNormalization())) # mmw_razi_TDCNN.add(TimeDistributed(MaxPooling2D(pool_size=2))) mmw_razi_TDCNN.add(TimeDistributed( Flatten())) # this should be where layers meets merged = concatenate([mmw_rdpl_TDCNN.output, mmw_razi_TDCNN.output ]) # concatenate two feature extractors regressive_tensor = LSTM( units=32, return_sequences=True, kernel_initializer='random_uniform', kernel_regularizer=tf.keras.regularizers.l2(l=1e-4), recurrent_regularizer=tf.keras.regularizers.l2(l=1e-5), activity_regularizer=tf.keras.regularizers.l2(l=1e-5))(merged) regressive_tensor = Dropout(rate=0.5)(regressive_tensor) regressive_tensor = LSTM( units=32, return_sequences=False, kernel_initializer='random_uniform', kernel_regularizer=tf.keras.regularizers.l2(l=1e-4), recurrent_regularizer=tf.keras.regularizers.l2(l=1e-5), activity_regularizer=tf.keras.regularizers.l2( l=1e-5))(regressive_tensor) regressive_tensor = Dropout(rate=0.5)(regressive_tensor) regressive_tensor = Dense( units=256, kernel_regularizer=tf.keras.regularizers.l2(l=1e-4), bias_regularizer=tf.keras.regularizers.l2(l=1e-5), activity_regularizer=tf.keras.regularizers.l2( l=1e-5))(regressive_tensor) regressive_tensor = Dropout(rate=0.5)(regressive_tensor) regressive_tensor = Dense( len(classes), activation='softmax', kernel_initializer='random_uniform')(regressive_tensor) model = Model(inputs=[mmw_rdpl_TDCNN.input, mmw_razi_TDCNN.input], outputs=regressive_tensor) adam = tf.keras.optimizers.Adam(lr=1e-4, decay=1e-7) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) return model
def norm(x): return BatchNormalization(axis=-1)(x)