def __init__(self, num_layers, d_model, num_heads, dff, maximum_position_encoding, pretrained_cnn_type, rate=0.1, multiplier=10): super(CoAttEncoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.maximum_position_encoding = maximum_position_encoding # self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model) self.bilstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, return_sequences=True)) self.question_dense = tf.keras.layers.Dense(d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model) self.pretrained_CNN = pretrained_cnn(pretrained_cnn_type) self.conv = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu') self.image_dense = Dense(d_model, activation='relu') self.fc1 = Dense(maximum_position_encoding * multiplier) self.img_enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)] self.qus_enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)] self.qus_dropout = tf.keras.layers.Dropout(rate) self.img_dropout = tf.keras.layers.Dropout(rate)
def __init__(self, num_layers, d_model, num_heads, dff, maximum_position_encoding, rate=0.1, pretrained_cnn_type='inception', multiplier=10): super(Image_Question_Encoder, self).__init__() self.maximum_position_encoding = maximum_position_encoding self.pretrained_CNN = pretrained_cnn(pretrained_cnn_type) self.fc1 = Dense(maximum_position_encoding * multiplier) self.image_dense = Dense(d_model, activation='relu') self.question_encoder = Encoder( num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, maximum_position_encoding=maximum_position_encoding) self.concatenator = Concatenate(axis=1) self.conv = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu') self.bilstm = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(512, return_sequences=True))
def __init__(self, num_layers, d_model, num_heads, dff, maximum_position_encoding, rate=0.1, pretrained_cnn_type='inception'): super(IEAM, self).__init__() self.d_model = d_model self.num_layers = num_layers # self.maximum_position_encoding = maximum_position_encoding self.pos_encoding = positional_encoding(49, d_model) self.pretrained_CNN = pretrained_cnn(pretrained_cnn_type) self.conv = tf.keras.layers.Conv2D(1024, 3, padding='same', activation='relu') # self.fc1 = Dense(maximum_position_encoding * 10) self.image_dense = Dense(d_model, activation='relu') self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(rate)