def __init__(self, vocab_size=21128, max_positions=512, hidden_size=768, type_vocab_size=2, num_layers=6, num_attention_heads=8, intermediate_size=3072, activation="gelu", hidden_dropout_rate=0.2, attention_dropout_rate=0.1, initializer_range=0.02, epsilon=1e-12, **kwargs): input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_ids") segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="segment_ids") attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="attention_mask") bert_model = BertModel( vocab_size=vocab_size, max_positions=max_positions, hidden_size=hidden_size, type_vocab_size=type_vocab_size, num_layers=num_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, activation=activation, hidden_dropout_rate=hidden_dropout_rate, attention_dropout_rate=attention_dropout_rate, initializer_range=initializer_range, epsilon=epsilon, name="bert", ) sequence_output, _, _, _ = bert_model(input_ids, segment_ids, attention_mask) embedding_table = bert_model.get_embedding_table() mlm = BertMaskedLanguageModelHead( embedding_table, vocab_size=vocab_size, hidden_size=hidden_size, activation=activation, initializer_range=initializer_range, epsilon=epsilon, name="cls", ) pred = mlm(sequence_output) pred = tf.keras.layers.Lambda(lambda x: x, name="predictions")(pred) super().__init__(inputs=[input_ids, segment_ids, attention_mask], outputs=[pred], **kwargs) self.epsilon = epsilon
def __init__(self, vocab_size=21128, max_positions=512, hidden_size=768, type_vocab_size=2, num_layers=6, num_attention_heads=8, intermediate_size=3072, activation="gelu", hidden_dropout_rate=0.2, attention_dropout_rate=0.1, initializer_range=0.02, epsilon=1e-12, **kwargs): # build functional model input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_ids") segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="segment_ids") attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="attention_mask") bert_model = BertModel( vocab_size=vocab_size, max_positions=max_positions, hidden_size=hidden_size, type_vocab_size=type_vocab_size, num_layers=num_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, activation=activation, hidden_dropout_rate=hidden_dropout_rate, attention_dropout_rate=attention_dropout_rate, initializer_range=initializer_range, epsilon=epsilon, name="bert", ) sequence_output, pooled_output, hidden_states, attention_weights = bert_model( input_ids, segment_ids, attention_mask) cls_embedding = tf.keras.layers.Lambda(lambda x: x[:, 0, :], name="cls")(sequence_output) pooler_embedding = tf.keras.layers.Lambda(lambda x: x, name="pooler")(pooled_output) avg_embedding = tf.keras.layers.Lambda( lambda x: tf.reduce_mean(x, axis=1), name="avg")(sequence_output) all_hidden_states = tf.keras.layers.Lambda( lambda x: x, name="hidden_states")(hidden_states) all_attention_weights = tf.keras.layers.Lambda( lambda x: x, name="attention_weights")(attention_weights) super().__init__(inputs=[input_ids, segment_ids, attention_mask], outputs=[ cls_embedding, pooler_embedding, avg_embedding, all_hidden_states, all_attention_weights ], **kwargs)
def __init__(self, num_labels=2, num_classes=2, vocab_size=21128, max_positions=512, hidden_size=768, type_vocab_size=2, num_layers=6, num_attention_heads=8, intermediate_size=3072, activation="gelu", hidden_dropout_rate=0.2, attention_dropout_rate=0.1, initializer_range=0.02, epsilon=1e-12, **kwargs): # build functional model input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_ids") segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="segment_ids") attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="attention_mask") bert_model = BertModel( vocab_size=vocab_size, max_positions=max_positions, hidden_size=hidden_size, type_vocab_size=type_vocab_size, num_layers=num_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, activation=activation, hidden_dropout_rate=hidden_dropout_rate, attention_dropout_rate=attention_dropout_rate, initializer_range=initializer_range, epsilon=epsilon, name="bert", ) sequence_output, pooled_output, _, _ = bert_model( input_ids, segment_ids, attention_mask) logits = tf.keras.layers.Dense(num_labels, name="dense")(sequence_output) start_logits = tf.keras.layers.Lambda(lambda x: x[:, :, 0], name="start")(logits) end_logits = tf.keras.layers.Lambda(lambda x: x[:, :, 1], name="end")(logits) class_logits = tf.keras.layers.Dense(num_classes, name="class")(pooled_output) super().__init__(inputs=[input_ids, segment_ids, attention_mask], outputs=[start_logits, end_logits, class_logits], **kwargs)
def __init__(self, num_labels=2, vocab_size=21128, max_positions=512, hidden_size=768, type_vocab_size=2, num_layers=6, num_attention_heads=8, intermediate_size=3072, activation='gelu', hidden_dropout_rate=0.2, attention_dropout_rate=0.1, initializer_range=0.02, epsilon=1e-12, **kwargs): # build functional model input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='input_ids') segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='segment_ids') attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name='attention_mask') bert_model = BertModel(vocab_size=vocab_size, max_positions=max_positions, hidden_size=hidden_size, type_vocab_size=type_vocab_size, num_layers=num_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, activation=activation, hidden_dropout_rate=hidden_dropout_rate, attention_dropout_rate=attention_dropout_rate, initializer_range=initializer_range, epsilon=epsilon, name='bert') sequence_output, _, _, _ = bert_model(input_ids, segment_ids, attention_mask) logits = tf.keras.layers.Dense(num_labels, name='dense')(sequence_output) head_logits = tf.keras.layers.Lambda(lambda x: x[:, :, 0], name='head')(logits) tail_logits = tf.keras.layers.Lambda(lambda x: x[:, :, 1], name='tail')(logits) super().__init__(inputs=[input_ids, segment_ids, attention_mask], outputs=[head_logits, tail_logits], **kwargs) self.num_labels = num_labels self.bert_model = bert_model
def __init__(self, vocab_size=21128, max_positions=512, hidden_size=768, type_vocab_size=2, num_layers=6, num_attention_heads=8, intermediate_size=3072, activation="gelu", hidden_dropout_rate=0.2, attention_dropout_rate=0.1, initializer_range=0.02, epsilon=1e-12, temperature=0.05, negative_weight=0.2, **kwargs): # build functional model input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_ids") segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="segment_ids") attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="attention_mask") bert_model = BertModel( vocab_size=vocab_size, max_positions=max_positions, hidden_size=hidden_size, type_vocab_size=type_vocab_size, num_layers=num_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, activation=activation, hidden_dropout_rate=hidden_dropout_rate, attention_dropout_rate=attention_dropout_rate, initializer_range=initializer_range, epsilon=epsilon, name="bert", ) sequence_output, _, _, _ = bert_model(input_ids, segment_ids, attention_mask) embedding = tf.keras.layers.Dense(hidden_size, name="embedding")( sequence_output[:, 0, :]) super().__init__(inputs=[input_ids, segment_ids, attention_mask], outputs=[embedding]) self.temperature = temperature self.negative_weight = negative_weight
def __init__(self, num_labels, vocab_size=21128, max_positions=512, hidden_size=768, type_vocab_size=2, num_layers=6, num_attention_heads=8, intermediate_size=3072, activation="gelu", hidden_dropout_rate=0.2, attention_dropout_rate=0.1, initializer_range=0.02, epsilon=1e-12, **kwargs): # build functional model input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_ids") segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="segment_ids") attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="attention_mask") bert_model = BertModel( vocab_size=vocab_size, max_positions=max_positions, hidden_size=hidden_size, type_vocab_size=type_vocab_size, num_layers=num_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, activation=activation, hidden_dropout_rate=hidden_dropout_rate, attention_dropout_rate=attention_dropout_rate, initializer_range=initializer_range, epsilon=epsilon, name="bert", ) crf = tfa.layers.CRF(num_labels) sequence_outputs, _, _, _ = bert_model(input_ids, segment_ids, attention_mask) decode_sequence, potentials, sequence_length, kernel = crf( sequence_outputs) super().__init__( inputs=[input_ids, segment_ids, attention_mask], outputs=[decode_sequence, potentials, sequence_length, kernel], **kwargs)
def __init__(self, num_labels=2, vocab_size=21128, max_positions=512, hidden_size=768, type_vocab_size=2, num_layers=6, num_attention_heads=8, intermediate_size=3072, activation="gelu", hidden_dropout_rate=0.2, attention_dropout_rate=0.1, initializer_range=0.02, epsilon=1e-12, **kwargs): input_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="input_ids") segment_ids = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="segment_ids") attention_mask = tf.keras.layers.Input(shape=(None, ), dtype=tf.int32, name="attention_mask") bert_model = BertModel( vocab_size=vocab_size, max_positions=max_positions, hidden_size=hidden_size, type_vocab_size=type_vocab_size, num_layers=num_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, activation=activation, hidden_dropout_rate=hidden_dropout_rate, attention_dropout_rate=attention_dropout_rate, initializer_range=initializer_range, epsilon=epsilon, name="bert", ) sequence_output, _, _, _ = bert_model(input_ids, segment_ids, attention_mask) logits = tf.keras.layers.Dense(num_labels, name="logits")(sequence_output) super().__init__(inputs=[input_ids, segment_ids, attention_mask], outputs=[logits], **kwargs)