def __init__(self, shape, kernel, filters, initializer=slim.initializers.xavier_initializer(), data_format='channels_last', activation=tf.tanh, normalize=False, forget_bias=1., **kwargs): self._normalize = normalize self.kernel = kernel self.kernel_size = kernel self.filters = filters self._initializer = initializer self._activation = activation self._forget_bias = forget_bias self._size = tf.TensorShape(shape + [self.filters]) self._feature_axis = self._size.ndims self.data_format = data_format if self._normalize: self.layer_norm_input_contribution = KL.LayerNormalization() self.layer_norm_input_gate = KL.LayerNormalization() self.layer_norm_output_gate = KL.LayerNormalization() self.layer_norm_forget_gate = KL.LayerNormalization() self.layer_norm_memory = KL.LayerNormalization() super(ConvLSTMCell, self).__init__(**kwargs)
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): super(TransformerBlock, self).__init__() self.att = MultiHeadSelfAttention(embed_dim, num_heads) self.ffn = keras.Sequential([ layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ]) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate)
def apply(x): x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x) x = layers.LayerNormalization( epsilon=1e-6, name=name + "_head_layernorm" )(x) x = layers.Dense(num_classes, name=name + "_head_dense")(x) return x
def apply(inputs): x = inputs x = layers.Conv2D( filters=projection_dim, kernel_size=7, padding="same", groups=projection_dim, name=name + "_depthwise_conv", )(x) x = layers.LayerNormalization(epsilon=1e-6, name=name + "_layernorm")(x) x = layers.Dense(4 * projection_dim, name=name + "_pointwise_conv_1")(x) x = layers.Activation("gelu", name=name + "_gelu")(x) x = layers.Dense(projection_dim, name=name + "_pointwise_conv_2")(x) if layer_scale_init_value is not None: x = LayerScale( layer_scale_init_value, projection_dim, name=name + "_layer_scale", )(x) if drop_path_rate: layer = StochasticDepth( drop_path_rate, name=name + "_stochastic_depth" ) else: layer = layers.Activation("linear", name=name + "_identity") return inputs + layer(x)
def __init__(self, FC_units=512, num_filters=32, num_classes=2, droupout=0.4, name="DCNN"): # calling superclass constructor super(DCNN, self).__init__(name=name) # adding layers to DCNN model object self.bert_layer = hub.KerasLayer( "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1", trainable=False) self.bigram_layer = layers.Conv1D(filters=num_filters, kernel_size=2, padding='valid', activation='relu') self.trigram_layer = layers.Conv1D(filters=num_filters, kernel_size=3, padding='valid', activation='relu') self.fourgram_layer = layers.Conv1D(filters=num_filters, kernel_size=4, padding='valid', activation='relu') self.batchnorm = layers.BatchNormalization() self.layernorm = layers.LayerNormalization() self.pool_layer = layers.GlobalMaxPool1D() self.dense_layer = layers.Dense(FC_units, activation='relu') self.dropout_layer = layers.Dropout(rate=dropout_rate) if num_classes == 2: self.output_layer = layers.Dense(units=1, activation="sigmoid") else: self.output_layer = layers.Dense(units=nb_classes, activation="softmax")
def ConvNeXt( depths, projection_dims, drop_path_rate=0.0, layer_scale_init_value=1e-6, default_size=224, model_name="convnext", include_preprocessing=True, include_top=True, weights=None, input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation="softmax", ): """Instantiates ConvNeXt architecture given specific configuration. Args: depths: An iterable containing depths for each individual stages. projection_dims: An iterable containing output number of channels of each individual stages. drop_path_rate: Stochastic depth probability. If 0.0, then stochastic depth won't be used. layer_scale_init_value: Layer scale coefficient. If 0.0, layer scaling won't be used. default_size: Default input image size. model_name: An optional name for the model. include_preprocessing: boolean denoting whther to include preprocessing in the model. When `weights="imagenet"` this should be always set to True. But for other models (e.g., randomly initialized) users should set it to False and apply preprocessing to data accordingly. include_top: Boolean denoting whether to include classification head to the model. weights: one of `None` (random initialization), `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax`, or `None` when using a pretrained top layer. ValueError: if `include_top` is True but `num_classes` is not 1000 when using ImageNet. """ if not (weights in {"imagenet", None} or tf.io.gfile.exists(weights)): raise ValueError( "The `weights` argument should be either " "`None` (random initialization), `imagenet` " "(pre-training on ImageNet), " "or the path to the weights file to be loaded." ) if weights == "imagenet" and include_top and classes != 1000: raise ValueError( "If using `weights` as `'imagenet'` with `include_top`" " as true, `classes` should be 1000" ) # Determine proper input shape. input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights, ) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if input_tensor is not None: inputs = utils.layer_utils.get_source_inputs(input_tensor) else: inputs = img_input x = inputs if include_preprocessing: channel_axis = ( 3 if backend.image_data_format() == "channels_last" else 1 ) num_channels = input_shape[channel_axis - 1] if num_channels == 3: x = PreStem(name=model_name)(x) # Stem block. stem = sequential.Sequential( [ layers.Conv2D( projection_dims[0], kernel_size=4, strides=4, name=model_name + "_stem_conv", ), layers.LayerNormalization( epsilon=1e-6, name=model_name + "_stem_layernorm" ), ], name=model_name + "_stem", ) # Downsampling blocks. downsample_layers = [] downsample_layers.append(stem) num_downsample_layers = 3 for i in range(num_downsample_layers): downsample_layer = sequential.Sequential( [ layers.LayerNormalization( epsilon=1e-6, name=model_name + "_downsampling_layernorm_" + str(i), ), layers.Conv2D( projection_dims[i + 1], kernel_size=2, strides=2, name=model_name + "_downsampling_conv_" + str(i), ), ], name=model_name + "_downsampling_block_" + str(i), ) downsample_layers.append(downsample_layer) # Stochastic depth schedule. # This is referred from the original ConvNeXt codebase: # https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py#L86 depth_drop_rates = [ float(x) for x in np.linspace(0.0, drop_path_rate, sum(depths)) ] # First apply downsampling blocks and then apply ConvNeXt stages. cur = 0 num_convnext_blocks = 4 for i in range(num_convnext_blocks): x = downsample_layers[i](x) for j in range(depths[i]): x = ConvNeXtBlock( projection_dim=projection_dims[i], drop_path_rate=depth_drop_rates[cur + j], layer_scale_init_value=layer_scale_init_value, name=model_name + f"_stage_{i}_block_{j}", )(x) cur += depths[i] if include_top: x = Head(num_classes=classes, name=model_name)(x) imagenet_utils.validate_activation(classifier_activation, weights) else: if pooling == "avg": x = layers.GlobalAveragePooling2D()(x) elif pooling == "max": x = layers.GlobalMaxPooling2D()(x) x = layers.LayerNormalization(epsilon=1e-6)(x) model = training_lib.Model(inputs=inputs, outputs=x, name=model_name) # Load weights. if weights == "imagenet": if include_top: file_suffix = ".h5" file_hash = WEIGHTS_HASHES[model_name][0] else: file_suffix = "_notop.h5" file_hash = WEIGHTS_HASHES[model_name][1] file_name = model_name + file_suffix weights_path = utils.data_utils.get_file( file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir="models", file_hash=file_hash, ) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model