def __init__(self, **kwargs): super(AModel, self).__init__(**kwargs) self.embedding = keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True) self.attention = AttentionLayer(num_heads=5, size_per_head=3) self.timedist = keras.layers.TimeDistributed(MaskFlatten()) self.bigru = keras.layers.Bidirectional(keras.layers.GRU(units=8)) self.softmax = keras.layers.Dense(units=2, activation="softmax")
def test_attention(self): max_seq_len = random.randint(5, 10) count = 0 for data, tag in self.data_generator(4, max_seq_len): count += 1 print(data, tag) if count > 2: break class AModel(keras.models.Model): def __init__(self, **kwargs): super(AModel, self).__init__(**kwargs) self.embedding = keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True) self.attention = AttentionLayer(num_heads=5, size_per_head=3) self.timedist = keras.layers.TimeDistributed(MaskFlatten()) self.bigru = keras.layers.Bidirectional(keras.layers.GRU(units=8)) self.softmax = keras.layers.Dense(units=2, activation="softmax") #def build(self, input_shape): # super(AModel,self).build(input_shape) def call(self, inputs, training=None, mask=None): out = inputs out = self.embedding(out) out = self.attention(out) out = self.timedist(out) out = self.bigru(out) out = self.softmax(out) return out model = keras.models.Sequential([ keras.layers.Embedding(input_dim=5, output_dim=3, mask_zero=True), AttentionLayer(num_heads=5, size_per_head=3), keras.layers.TimeDistributed(MaskFlatten()), keras.layers.Bidirectional(keras.layers.GRU(units=8)), keras.layers.Dense(units=2, activation="softmax") ]) #model = AModel() model.compile(optimizer=keras.optimizers.Adam(lr=0.003), loss=keras.losses.sparse_categorical_crossentropy, metrics=[keras.metrics.sparse_categorical_accuracy]) # model.build(input_shape=(None, max_seq_len)) model.build() model.summary() model.fit_generator( generator=self.data_generator(64, max_seq_len), steps_per_epoch=100, epochs=100, validation_data=self.data_generator(8, max_seq_len), validation_steps=10, callbacks=[ keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy', patience=5), ], )
def build(self, input_shape): self.input_spec = keras.layers.InputSpec(shape=input_shape) self.attention_layer = AttentionLayer.from_params( self.params, size_per_head=self.size_per_head, name="self", ) self.attention_projector = ProjectionLayer.from_params( self.params, name="output", ) super(TransformerSelfAttentionLayer, self).build(input_shape)