seq_length=args.seq_length, hidden_size=hidden_size, num_hidden_layers=args.num_hidden_layers, num_attention_heads=args.num_attention_heads, intermediate_size=intermediate_size, hidden_act="gelu", hidden_dropout_prob=args.hidden_dropout_prob, attention_probs_dropout_prob=args.attention_probs_dropout_prob, max_position_embeddings=args.max_position_embeddings, type_vocab_size=args.type_vocab_size, initializer_range=0.02, ) return loss, logits, decoders['label_ids'] @flow.global_function(type='train', function_config=GetFunctionConfig(args)) def BertGlueFinetuneJob(): loss, logits, _ = BuildBert( batch_size, args.train_data_part_num, args.train_data_dir, args.train_data_prefix, ) flow.losses.add_loss(loss) opt = CreateOptimizer(args) opt.minimize(loss) return {'loss': loss} @flow.global_function(type='predict', function_config=GetFunctionConfig(args)) def BertGlueEvalTrainJob():
_blob_conf("input_ids", [seq_length]) _blob_conf("input_mask", [seq_length]) _blob_conf("segment_ids", [seq_length]) if is_train: _blob_conf("start_positions", [1]) _blob_conf("end_positions", [1]) else: _blob_conf("unique_ids", [1]) return blob_confs if args.do_train: @flow.global_function(type='train', function_config=GetFunctionConfig(args)) def SquadFinetuneJob(): hidden_size = 64 * args.num_attention_heads # , H = 64, size per head intermediate_size = hidden_size * 4 decoders = SquadDecoder(args.train_data_dir, batch_size, args.train_data_part_num, args.seq_length) start_logits, end_logits = SQuAD( decoders['input_ids'], decoders['input_mask'], decoders['segment_ids'], args.vocab_size, seq_length=args.seq_length, hidden_size=hidden_size, num_hidden_layers=args.num_hidden_layers,
def _blob_conf(name, shape, dtype=flow.int32): blob_confs[name] = flow.data.OFRecordRawDecoder( ofrecord, name, shape=shape, dtype=dtype ) _blob_conf("input_ids", [seq_length]) _blob_conf("next_sentence_labels", [1]) _blob_conf("input_mask", [seq_length]) _blob_conf("segment_ids", [seq_length]) _blob_conf("masked_lm_ids", [max_predictions_per_seq]) _blob_conf("masked_lm_positions", [max_predictions_per_seq]) _blob_conf("masked_lm_weights", [max_predictions_per_seq], flow.float) return blob_confs @flow.global_function(type="train", function_config=GetFunctionConfig(args)) def PretrainJob(): hidden_size = 64 * args.num_attention_heads # , H = 64, size per head intermediate_size = hidden_size * 4 if args.data_part_num == 1: with flow.scope.placement("cpu", "0:0"): decoders = BertDecoder( args.data_dir, batch_size, args.data_part_num, args.seq_length, args.max_predictions_per_seq, ) else: assert args.data_part_num > 1