seq_length=args.seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=args.num_hidden_layers,
        num_attention_heads=args.num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act="gelu",
        hidden_dropout_prob=args.hidden_dropout_prob,
        attention_probs_dropout_prob=args.attention_probs_dropout_prob,
        max_position_embeddings=args.max_position_embeddings,
        type_vocab_size=args.type_vocab_size,
        initializer_range=0.02,
    )
    return loss, logits, decoders['label_ids']


@flow.global_function(type='train', function_config=GetFunctionConfig(args))
def BertGlueFinetuneJob():
    loss, logits, _ = BuildBert(
        batch_size,
        args.train_data_part_num,
        args.train_data_dir,
        args.train_data_prefix,
    )
    flow.losses.add_loss(loss)
    opt = CreateOptimizer(args)
    opt.minimize(loss)
    return {'loss': loss}


@flow.global_function(type='predict', function_config=GetFunctionConfig(args))
def BertGlueEvalTrainJob():
Ejemplo n.º 2
0
        _blob_conf("input_ids", [seq_length])
        _blob_conf("input_mask", [seq_length])
        _blob_conf("segment_ids", [seq_length])
        if is_train:
            _blob_conf("start_positions", [1])
            _blob_conf("end_positions", [1])
        else:
            _blob_conf("unique_ids", [1])

        return blob_confs


if args.do_train:

    @flow.global_function(type='train',
                          function_config=GetFunctionConfig(args))
    def SquadFinetuneJob():
        hidden_size = 64 * args.num_attention_heads  # , H = 64, size per head
        intermediate_size = hidden_size * 4

        decoders = SquadDecoder(args.train_data_dir, batch_size,
                                args.train_data_part_num, args.seq_length)

        start_logits, end_logits = SQuAD(
            decoders['input_ids'],
            decoders['input_mask'],
            decoders['segment_ids'],
            args.vocab_size,
            seq_length=args.seq_length,
            hidden_size=hidden_size,
            num_hidden_layers=args.num_hidden_layers,
    def _blob_conf(name, shape, dtype=flow.int32):
        blob_confs[name] = flow.data.OFRecordRawDecoder(
            ofrecord, name, shape=shape, dtype=dtype
        )

    _blob_conf("input_ids", [seq_length])
    _blob_conf("next_sentence_labels", [1])
    _blob_conf("input_mask", [seq_length])
    _blob_conf("segment_ids", [seq_length])
    _blob_conf("masked_lm_ids", [max_predictions_per_seq])
    _blob_conf("masked_lm_positions", [max_predictions_per_seq])
    _blob_conf("masked_lm_weights", [max_predictions_per_seq], flow.float)
    return blob_confs


@flow.global_function(type="train", function_config=GetFunctionConfig(args))
def PretrainJob():
    hidden_size = 64 * args.num_attention_heads  # , H = 64, size per head
    intermediate_size = hidden_size * 4

    if args.data_part_num == 1:
        with flow.scope.placement("cpu", "0:0"):
            decoders = BertDecoder(
                args.data_dir,
                batch_size,
                args.data_part_num,
                args.seq_length,
                args.max_predictions_per_seq,
            )
    else:
        assert args.data_part_num > 1