def maskedBert( input_ids_blob, input_mask_blob, token_type_ids_blob, masked_lm_positions_blob, # masked_lm_positions_blob, # masked_lm_ids_blob, vocab_size, seq_length=512, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, # max_predictions_per_seq=20, initializer_range=0.02, ): backbone = bert_util.BertBackbone( input_ids_blob=input_ids_blob, input_mask_blob=input_mask_blob, token_type_ids_blob=token_type_ids_blob, vocab_size=vocab_size, seq_length=seq_length, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, hidden_dropout_prob=hidden_dropout_prob, attention_probs_dropout_prob=attention_probs_dropout_prob, max_position_embeddings=max_position_embeddings, type_vocab_size=type_vocab_size, initializer_range=initializer_range, ) predictions = _AddMaskedLanguageModel( input_blob=backbone.sequence_output(), output_weights_blob=backbone.embedding_table(), positions_blob=masked_lm_positions_blob, seq_length=seq_length, hidden_size=hidden_size, vocab_size=vocab_size, hidden_act=bert_util.GetActivation(hidden_act), initializer_range=initializer_range, ) pooled_output = PooledOutput(backbone.sequence_output(), hidden_size, initializer_range) return predictions
def SQuAD( input_ids_blob, input_mask_blob, token_type_ids_blob, vocab_size, seq_length=512, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, initializer_range=0.02, ): backbone = bert_util.BertBackbone( input_ids_blob=input_ids_blob, input_mask_blob=input_mask_blob, token_type_ids_blob=token_type_ids_blob, vocab_size=vocab_size, seq_length=seq_length, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, hidden_dropout_prob=hidden_dropout_prob, attention_probs_dropout_prob=attention_probs_dropout_prob, max_position_embeddings=max_position_embeddings, type_vocab_size=type_vocab_size, initializer_range=initializer_range, ) with flow.scope.namespace("cls-squad"): final_hidden = backbone.sequence_output() final_hidden_matrix = flow.reshape(final_hidden, [-1, hidden_size]) logits = bert_util._FullyConnected( final_hidden_matrix, hidden_size, units=2, weight_initializer=bert_util.CreateInitializer(initializer_range), name='output') logits = flow.reshape(logits, [-1, seq_length, 2]) start_logits = flow.slice(logits, [None, None, 0], [None, None, 1]) end_logits = flow.slice(logits, [None, None, 1], [None, None, 1]) return start_logits, end_logits
def GlueBERT( input_ids_blob, input_mask_blob, token_type_ids_blob, label_blob, vocab_size, seq_length=512, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, initializer_range=0.02, label_num=2, replace_prob=None, ): backbone = bert_util.BertBackbone( input_ids_blob=input_ids_blob, input_mask_blob=input_mask_blob, token_type_ids_blob=token_type_ids_blob, vocab_size=vocab_size, seq_length=seq_length, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, hidden_dropout_prob=hidden_dropout_prob, attention_probs_dropout_prob=attention_probs_dropout_prob, max_position_embeddings=max_position_embeddings, type_vocab_size=type_vocab_size, initializer_range=initializer_range, ) pooled_output = PooledOutput(sequence_output=backbone.sequence_output(), hidden_size=hidden_size, initializer_range=initializer_range) loss, _, logit_blob = _AddClassficationLoss( input_blob=pooled_output, label_blob=label_blob, hidden_size=hidden_size, label_num=label_num, initializer_range=initializer_range, scope_name='classification') return loss, logit_blob
def PreTrain( input_ids_blob, input_mask_blob, token_type_ids_blob, masked_lm_positions_blob, masked_lm_ids_blob, masked_lm_weights_blob, next_sentence_label_blob, vocab_size, seq_length=512, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, max_predictions_per_seq=20, initializer_range=0.02, use_fp16=False, ): backbone = bert_util.BertBackbone( input_ids_blob=input_ids_blob, input_mask_blob=input_mask_blob, token_type_ids_blob=token_type_ids_blob, vocab_size=vocab_size, seq_length=seq_length, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, hidden_dropout_prob=hidden_dropout_prob, attention_probs_dropout_prob=attention_probs_dropout_prob, max_position_embeddings=max_position_embeddings, type_vocab_size=type_vocab_size, initializer_range=initializer_range, ) (lm_loss, _, _) = _AddMaskedLanguageModelLoss( input_blob=backbone.sequence_output(), output_weights_blob=backbone.embedding_table(), positions_blob=masked_lm_positions_blob, label_id_blob=masked_lm_ids_blob, label_weight_blob=masked_lm_weights_blob, seq_length=seq_length, hidden_size=hidden_size, vocab_size=vocab_size, max_predictions_per_seq=max_predictions_per_seq, hidden_act=bert_util.GetActivation(hidden_act), initializer_range=initializer_range, ) pooled_output = PooledOutput(backbone.sequence_output(), hidden_size, initializer_range) (ns_loss, _, _) = _AddNextSentenceOutput( input_blob=pooled_output, label_blob=next_sentence_label_blob, hidden_size=hidden_size, initializer_range=initializer_range, ) with flow.scope.namespace("cls-loss"): lm_loss = flow.math.reduce_mean(lm_loss) ns_loss = flow.math.reduce_mean(ns_loss) total_loss = lm_loss + ns_loss return total_loss, lm_loss, ns_loss
def BertStudentForSequenceClassification(input_ids_blob, input_mask_blob, token_type_ids_blob, label_blob, vocab_size, seq_length=512, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, initializer_range=0.02, label_num=2, is_student=False, fit_size=768, is_train=True): with flow.scope.namespace('student'): backbone = bert_util.BertBackbone( input_ids_blob=input_ids_blob, input_mask_blob=input_mask_blob, token_type_ids_blob=token_type_ids_blob, vocab_size=vocab_size, seq_length=seq_length, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, hidden_dropout_prob=hidden_dropout_prob, attention_probs_dropout_prob=attention_probs_dropout_prob, max_position_embeddings=max_position_embeddings, type_vocab_size=type_vocab_size, initializer_range=initializer_range, is_train=is_train) pooled_output = PooledOutput( sequence_output=backbone.sequence_output(), hidden_size=hidden_size, initializer_range=initializer_range, is_train=is_train) logit_blob = _AddClassfication(input_blob=pooled_output, label_blob=label_blob, hidden_size=hidden_size, label_num=label_num, initializer_range=initializer_range, scope_name='classification', is_train=is_train) sequence_output = backbone.all_encoder_layers() att_output = backbone.all_attention_probs() embed_output = backbone.embedding_output() sequence_output.insert(0, embed_output) # print(logit_blob.shape) # print(len(sequence_output)) # print(sequence_output.shape) tmp = [] if is_student: for s_id, sequence_layer in enumerate(sequence_output): tmp.append( fit_dense(input_blob=sequence_layer, hidden_size=hidden_size, label_num=fit_size, initializer_range=initializer_range, scope_name='fit_dense', is_train=is_train)) sequence_output = tmp return logit_blob, sequence_output, att_output