Esempio n. 1
0
def get_model(attention_type="external_attention"):
    inputs = layers.Input(shape=input_shape)
    # Image augment
    x = data_augmentation(inputs)
    # Extract patches.
    x = PatchExtract(patch_size)(x)
    # Create patch embedding.
    x = PatchEmbedding(num_patches, embedding_dim)(x)
    # Create Transformer block.
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(
            x,
            embedding_dim,
            mlp_dim,
            num_heads,
            dim_coefficient,
            attention_dropout,
            projection_dropout,
            attention_type,
        )

    x = layers.GlobalAvgPool1D()(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model
Esempio n. 2
0
def squeeze_excite_block(input_tensor, ratio=8):
    """Create a channel-wise squeeze-excite block.
    Args:
        input_tensor: input Keras tensor
        ratio: number of output filters
    Returns: a Keras tensor
    
    References
    -   [Squeeze and Excitation Networks](https://arxiv.org/abs/1709.01507)
    """
    init = input_tensor
    channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
    filters = init.shape[channel_axis]#_tensor_shape(init)[channel_axis]
    se_shape = (1, filters)

    se = layers.GlobalAvgPool1D()(init)
    se = layers.Reshape(se_shape)(se)
    se = layers.Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
    se = layers.Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)

    if backend.image_data_format() == 'channels_first':
        se = layers.Permute((3, 1, 2))(se)

    x = layers.multiply([init, se])
    return x
Esempio n. 3
0
def keras_model_fn_cpu(model_config, vocab_size, embedding_size, embeddings):
    """ CPU version of Stacked Bi-LSTM and Bi-GRU with Two Fasttext
    """
    ## hyperparams
    model_name = model_config['model_name']
    num_class = model_config['num_class']
    lstm_hs = model_config['lstm_hs']
    gru_hs = model_config['gru_hs']
    learning_rate = model_config['learning_rate']

    with tf.device('/cpu:0'):
        ## build model
        inputs = ks.Input(shape=(None, ), dtype='int32', name='inputs')
        embedded_sequences_ft1 = layers.Embedding(vocab_size,
                                                  embedding_size,
                                                  trainable=False,
                                                  mask_zero=False)(inputs)
        embedded_sequences_ft2 = layers.Embedding(vocab_size,
                                                  embedding_size,
                                                  trainable=False,
                                                  mask_zero=False)(inputs)
        concat_embed = layers.concatenate(
            [embedded_sequences_ft1, embedded_sequences_ft2])
        concat_embed = layers.SpatialDropout1D(0.5)(concat_embed)
        x = layers.Bidirectional(
            layers.LSTM(lstm_hs,
                        recurrent_activation='sigmoid',
                        return_sequences=True))(concat_embed)
        x, x_h, x_c = layers.Bidirectional(
            layers.GRU(gru_hs,
                       reset_after=True,
                       recurrent_activation='sigmoid',
                       return_sequences=True,
                       return_state=True))(x)
        x_1 = layers.GlobalMaxPool1D()(x)
        x_2 = layers.GlobalAvgPool1D()(x)
        x_out = layers.concatenate([x_1, x_2, x_h])
        x_out = layers.BatchNormalization()(x_out)
        outputs = layers.Dense(num_class, activation='softmax',
                               name='outputs')(x_out)  # outputs
        model = ks.Model(inputs, outputs, name=model_name)

        ## compile
        model.compile(loss='categorical_crossentropy',
                      optimizer=ks.optimizers.Adam(lr=learning_rate,
                                                   clipnorm=.25,
                                                   beta_1=0.7,
                                                   beta_2=0.99),
                      metrics=[
                          'categorical_accuracy',
                          ks.metrics.TopKCategoricalAccuracy(k=3)
                      ])
        return model
Esempio n. 4
0
def get_model(encoder, embedding_dim=16):
    model = keras.Sequential([
        layers.Embedding(encoder.vocab_size, embedding_dim),
        layers.GlobalAvgPool1D(),
        layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model
Esempio n. 5
0
def create_vit_classifier(use_token_learner=True,
                          token_learner_units=NUM_TOKENS):
    inputs = layers.Input(shape=INPUT_SHAPE)  # (B, H, W, C)

    # Augment data.
    augmented = data_augmentation(inputs)

    # Create patches and project the pathces.
    projected_patches = layers.Conv2D(
        filters=PROJECTION_DIM,
        kernel_size=(PATCH_SIZE, PATCH_SIZE),
        strides=(PATCH_SIZE, PATCH_SIZE),
        padding="VALID",
    )(augmented)
    _, h, w, c = projected_patches.shape
    projected_patches = layers.Reshape(
        (h * w, c))(projected_patches)  # (B, number_patches, projection_dim)

    # Add positional embeddings to the projected patches.
    encoded_patches = position_embedding(
        projected_patches)  # (B, number_patches, projection_dim)
    encoded_patches = layers.Dropout(0.1)(encoded_patches)

    # Iterate over the number of layers and stack up blocks of
    # Transformer.
    for i in range(NUM_LAYERS):
        # Add a Transformer block.
        encoded_patches = transformer(encoded_patches)

        # Add TokenLearner layer in the middle of the
        # architecture. The paper suggests that anywhere
        # between 1/2 or 3/4 will work well.
        if use_token_learner and i == NUM_LAYERS // 2:
            _, hh, c = encoded_patches.shape
            h = int(math.sqrt(hh))
            encoded_patches = layers.Reshape(
                (h, h, c))(encoded_patches)  # (B, h, h, projection_dim)
            encoded_patches = token_learner(
                encoded_patches, token_learner_units)  # (B, num_tokens, c)

    # Layer normalization and Global average pooling.
    representation = layers.LayerNormalization(
        epsilon=LAYER_NORM_EPS)(encoded_patches)
    representation = layers.GlobalAvgPool1D()(representation)

    # Classify outputs.
    outputs = layers.Dense(NUM_CLASSES, activation="softmax")(representation)

    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model
Esempio n. 6
0
def create_vivit_classifier(
    tubelet_embedder,
    positional_encoder,
    input_shape=INPUT_SHAPE,
    transformer_layers=NUM_LAYERS,
    num_heads=NUM_HEADS,
    embed_dim=PROJECTION_DIM,
    layer_norm_eps=LAYER_NORM_EPS,
    num_classes=NUM_CLASSES,
):
    # Get the input layer
    inputs = layers.Input(shape=input_shape)
    # Create patches.
    patches = tubelet_embedder(inputs)
    # Encode patches.
    encoded_patches = positional_encoder(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization and MHSA
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim // num_heads, dropout=0.1
        )(x1, x1)

        # Skip connection
        x2 = layers.Add()([attention_output, encoded_patches])

        # Layer Normalization and MLP
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = keras.Sequential(
            [
                layers.Dense(units=embed_dim * 4, activation=tf.nn.gelu),
                layers.Dense(units=embed_dim, activation=tf.nn.gelu),
            ]
        )(x3)

        # Skip connection
        encoded_patches = layers.Add()([x3, x2])

    # Layer normalization and Global average pooling.
    representation = layers.LayerNormalization(epsilon=layer_norm_eps)(encoded_patches)
    representation = layers.GlobalAvgPool1D()(representation)

    # Classify outputs.
    outputs = layers.Dense(units=num_classes, activation="softmax")(representation)

    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model
Esempio n. 7
0
def get_sarcasm_model():
    model = models.Sequential()
    model.add(layers.Embedding(10000, 10, input_length=100))
    model.add(layers.Bidirectional(layers.GRU(32, return_sequences=True)))
    model.add(layers.GlobalAvgPool1D())
    model.add(layers.Dense(500, activation="relu"))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(2, activation="softmax"))

    # %%
    # model.summary()

    # %%
    model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])

    return model
Esempio n. 8
0
def generate_resnet_model(input_shape,
                          class_number,
                          min_filters_number,
                          max_kernel_size,
                          network_depth=3,
                          learning_rate=0.01,
                          regularization_rate=0.01,
                          metrics=['accuracy']):
    """
    Generate a ResNet model (see also https://arxiv.org/pdf/1611.06455.pdf)

    The compiled Keras model is returned.

    Parameters
    ----------
    input_shape : tuple
        Shape of the input dataset: (num_samples, num_timesteps, num_channels)
    class_number : int
        Number of classes for classification task
    min_filters_number : int
        Number of filters for first convolutional layer
    max_kernel_size: int,
        Maximum kernel size for convolutions within Inception module
    network_depth : int
        Depth of network, i.e. number of Inception modules to stack
    learning_rate : float
        learning rate
    regularization_rate : float
        regularization rate
    metrics : list
        Metrics to calculate on the validation set.
        See https://keras.io/metrics/ for possible values.

    Returns
    -------
    model : Keras model
        The compiled Keras model
    """

    dim_length = input_shape[1]  # number of samples in a time series
    dim_channels = input_shape[2]  # number of channels
    weightinit = 'lecun_uniform'
    regularization = 0

    def conv_bn_relu_3_sandwich(x, filters, kernel_size):
        first_x = x
        for i in range(3):
            x = layers.Convolution1D(filters,
                                     kernel_size,
                                     padding='same',
                                     kernel_initializer=weightinit,
                                     kernel_regularizer=l2(regularization))(x)
            x = layers.BatchNormalization()(x)
            x = layers.ReLU()(x)

        first_x = layers.Convolution1D(
            filters,
            kernel_size=1,
            padding='same',
            kernel_initializer=weightinit,
            kernel_regularizer=l2(regularization))(x)
        x = layers.Add()([x, first_x])
        return x

    x = layers.Input((dim_length, dim_channels))
    inputs = x

    x = layers.BatchNormalization()(
        inputs)  # Added batchnorm (not in original paper)

    # Define/guess filter sizes and kernel sizes
    # Logic here is that kernals become smaller while the number of filters increases
    kernel_sizes = [
        max(3, int(max_kernel_size // (1.41**i))) for i in range(network_depth)
    ]
    filter_numbers = [
        int(min_filters_number * (1.41**i)) for i in range(network_depth)
    ]

    for i in range(network_depth):
        x = conv_bn_relu_3_sandwich(x, filter_numbers[i], kernel_sizes[i])

    x = layers.GlobalAvgPool1D()(x)
    output_layer = layers.Dense(class_number, activation='softmax')(x)

    # Create model and compile
    model = Model(inputs=inputs, outputs=output_layer)

    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=learning_rate),
                  metrics=metrics)

    return model
Esempio n. 9
0
    def build(self, emb_trainable=True, pre_emb=True, emb_weight=None):
        """
        :param emb_trainable: Define trainable of Embedding Layer
        :param pre_emb: Whether to use pre-trained embedding weights
        :param emb_weight: Pre-trained embedding weights
        :return:
        """
        inputs = layers.Input(shape=(self.maxlen, ))
        pad_k = tf.expand_dims(tf.cast(
            (inputs == 0), dtype=tf.float32) * -99999,
                               axis=2)

        if pre_emb:
            emb_layer = layers.Embedding(self.words_count + 1,
                                         self.embed_dim,
                                         trainable=emb_trainable,
                                         weights=[emb_weight])
        else:
            emb_layer = layers.Embedding(self.words_count + 1,
                                         self.embed_dim,
                                         trainable=True)
        inputs_emb = emb_layer(inputs)

        # Bi-LSTM cell summary
        lstm_layer = layers.LSTM(self.embed_dim, return_sequences=True)
        bi_lstm = layers.Bidirectional(lstm_layer,
                                       merge_mode="ave")(inputs_emb)

        C_features, self.scalar_att, self.vector_att = AttentionLayer(
            self.embed_dim, self.embed_dim, self.channel, 0.0001,
            self.mask_prob)(bi_lstm, pad_k)
        inputs_emb2 = tf.expand_dims(inputs_emb, axis=3)
        C_features = tf.concat([inputs_emb2, C_features], axis=3)

        # kim-cnn process
        pools = []
        for filter_sizes in self.num_filters:
            cnn_layers = layers.Conv2D(self.filter_size,
                                       kernel_size=(filter_sizes,
                                                    self.embed_dim),
                                       activation="relu")
            cnn_out = cnn_layers(C_features)
            max_pools = layers.MaxPool2D(pool_size=(self.maxlen -
                                                    filter_sizes + 1,
                                                    1))(cnn_out)
            max_pools = layers.Flatten()(max_pools)
            pools.append(max_pools)
        concated = layers.concatenate(pools)  # filter size x num_fiilters 수

        # Higy-way process
        gap_input_emb = layers.GlobalAvgPool1D()(
            inputs_emb)  # 임베딩 사이즈로 global average pooling
        trans_ = layers.Dense(self.embed_dim,
                              activation="sigmoid",
                              use_bias=True)(gap_input_emb)
        carry_ = 1 - trans_
        gap_ = layers.Multiply()([trans_, gap_input_emb])
        concated_ = layers.Multiply()([carry_, concated])
        concated_ = layers.Add()([concated_, gap_])
        outputs = layers.Dense(1, activation="sigmoid")(concated_)

        self.model = keras.Model(inputs=inputs, outputs=outputs)
        return self.model
Esempio n. 10
0
                                                oov_token="<UNK>",
                                                lowercase=True,
                                                tokenizer=tokenizer)


def my_encoder(text_tensor, label):
    text_encoded = encoder.encode(text_tensor.numpy())
    return text_encoded, label


def map_encoder_func(sentence):
    split_sent = tf.strings.split(sentence, ',', maxsplit=4)
    reviews = "sostoken " + split_sent[4] + " eostoken"
    pos_neg = split_sent[2]
    label = 1 if pos_neg == 'pos' else 0

    (text_encoded, label) = tf.py_function(my_encoder,
                                           inp=[reviews, label],
                                           Tout=(tf.float64, tf.float32))

    text_encoded.shape([None])
    label.shape([])


model = keras.Sequential([
    layers.Masking(),
    layers.Embedding(),
    layers.GlobalAvgPool1D(),
    layers.Dense(10)
])
Esempio n. 11
0
from tensorflow.keras import layers
from tensorflow.keras import Input

vocabulary_size = 50000
n_income_groups = 10

posts_input = Input(shape=(None, ), dtype='int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalAvgPool1D()(x)

age_predication = layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(n_income_groups,
                                 activation='softmax',
                                 name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)
model = models.Model(
    posts_input,
    [age_predication, income_prediction, gender_prediction],
)

# Compilation options of a multi-output model: loss weighting
# Can also be done in a dictionary way.
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'],
Esempio n. 12
0
    label.set_shape([]) # will be a single integer so it is a scalar

    return encoded_text, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = ds_train.map(encode_map, num_parallel_calls=AUTOTUNE)
ds_train = ds_train.shuffle(10000)
ds_train = ds_train.padded_batch(32, padded_shapes=([None], ())) # None yazdigimiz kisim pad edilecek kisim. Ynei version tf'te bu kisma gerek yok

ds_test = ds_test.map(encode_map)
ds_test = ds_test.padded_batch(32, padded_shapes=([None], ()))

model = keras.Sequential([
    layers.Masking(mask_value=0), # "0" is padding value and will be ignored in computations
    layers.Embedding(input_dim=len(vocabulary)+2, output_dim=32), # plus 1 for padding value(0) and we have <UNK> (oov_words) as well, 
    # output dim is 32 dimentional embedding whihc is very small actually
    # each word is converted into 32 dimentional vector
    # BATCH_SIZE x 1000 --> BATCH_SIZE x 1000 X 32
    layers.GlobalAvgPool1D(),  # burada batch size 1000 (yani 1000 sequential word'un 32 dimention'lik vectorunun average'ini aliriz)
    # GlobalAvgPool sonucunda cikan shape BATCH_SIZE X 32 
    layers.Dense(64, activation='relu'),
    layers.Dense(1) # it is a binary classification positive or negative review
    ]
)

model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=keras.optimizers.Adam(3e-4, clipnorm=1), # we clip the gradients so that we do not have expoloding gradients
              metrics=['accuracy'])

model.fit(ds_train, epochs=10, verbose=2)
model.evaluate(ds_test, verbose=1)