Ejemplo n.º 1
0
    def __init__(self, dim=300, K=65536, m=0.999, T=0.07, mlp=False):
        """
        dim: feature dimension (default: 128)
        K: queue size; number of negative keys (default: 65536)
        m: moco momentum of updating key encoder (default: 0.999)
        T: softmax temperature (default: 0.07)
        """
        super(MoCo, self).__init__()

        self.K = K
        self.m = m
        self.T = T

        # create the encoders
        self.encoder_q = ErnieModelForSequenceClassification.from_pretrained('ernie-2.0-large-en', num_labels=dim)
        self.encoder_k = ErnieModelForSequenceClassification.from_pretrained('ernie-2.0-large-en', num_labels=dim)

        if mlp:
            dim_mlp = 1024
            self.encoder_q.classifier = D.Sequential(D.Linear(dim_mlp, dim_mlp, act='relu'),  self.encoder_q.classifier)
            self.encoder_k.classifier = D.Sequential(D.Linear(dim_mlp, dim_mlp,act='relu'), self.encoder_k.classifier)

        for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
            param_k=param_q  # initialize
            param_k.requires_grad = False  # not update by gradient

        # create the queue
        self.queue = L.randn([dim, K])
        self.queue = norm(self.queue, dim=0)

        self.queue_ptr = L.zeros([1], dtype='int32')
Ejemplo n.º 2
0
def model_fn(features, mode, params, run_config):
    ernie = ErnieModelForSequenceClassification(params, name='')
    if not params is propeller.RunMode.TRAIN:
        ernie.eval()

    metrics, loss = None, None
    if mode is propeller.RunMode.PREDICT:
        src_ids, sent_ids = features
        _, logits = ernie(src_ids, sent_ids)
        predictions = [
            logits,
        ]
    else:
        src_ids, sent_ids, labels = features
        if mode is propeller.RunMode.EVAL:
            loss, logits = ernie(src_ids, sent_ids, labels=labels)
            pred = L.argmax(logits, axis=1)
            acc = propeller.metrics.Acc(labels, pred)
            metrics = {'acc': acc}
            predictions = [pred]
        else:
            loss, logits = ernie(src_ids, sent_ids, labels=labels)
            scheduled_lr, _ = optimization(
                loss=loss,
                warmup_steps=int(run_config.max_steps *
                                 params['warmup_proportion']),
                num_train_steps=run_config.max_steps,
                learning_rate=params['learning_rate'],
                train_program=F.default_main_program(),
                startup_prog=F.default_startup_program(),
                use_fp16=params.use_fp16,
                weight_decay=params['weight_decay'],
                scheduler="linear_warmup_decay",
            )
            propeller.summary.scalar('lr', scheduled_lr)
            predictions = [
                logits,
            ]

    return propeller.ModelSpec(loss=loss,
                               mode=mode,
                               metrics=metrics,
                               predictions=predictions)
Ejemplo n.º 3
0
def model_fn(features, mode, params, run_config):
    ernie = ErnieModelForSequenceClassification(params, name='')
    if mode is not propeller.RunMode.TRAIN:
        ernie.eval()
    else:
        ernie.train()

    metrics, loss = None, None
    if mode is propeller.RunMode.PREDICT:
        src_ids, sent_ids = features
        _, logits = ernie(src_ids, sent_ids)
        predictions = [
            logits,
        ]
    else:
        src_ids, sent_ids, labels = features
        if mode is propeller.RunMode.EVAL:
            loss, logits = ernie(src_ids, sent_ids, labels=labels)
            pred = logits.argmax(axis=1)
            acc = propeller.metrics.Acc(labels, pred)
            metrics = {'acc': acc}
            predictions = [pred]
            train_hooks = None
        else:
            loss, logits = ernie(src_ids, sent_ids, labels=labels)
            lr_step_hook, loss_scale_coef = optimization(
                loss=loss,
                warmup_steps=int(run_config.max_steps *
                                 params['warmup_proportion']),
                num_train_steps=run_config.max_steps,
                learning_rate=params['learning_rate'],
                train_program=P.static.default_main_program(),
                startup_prog=P.static.default_startup_program(),
                use_fp16=args.use_amp,
                weight_decay=params['weight_decay'],
                scheduler="linear_warmup_decay",
            )
            scheduled_lr = P.static.default_main_program().global_block().var(
                'learning_rate_0')
            propeller.summary.scalar('lr', scheduled_lr)
            predictions = [
                logits,
            ]
            train_hooks = [lr_step_hook]

    return propeller.ModelSpec(loss=loss,
                               mode=mode,
                               metrics=metrics,
                               predictions=predictions,
                               train_hooks=train_hooks)
                        type=str,
                        default=None,
                        help='model output directory')
    parser.add_argument('--wd',
                        type=float,
                        default=0.01,
                        help='weight decay, aka L2 regularizer')

    args = parser.parse_args()

    tokenizer = ErnieTokenizer.from_pretrained(args.from_pretrained)
    #tokenizer = ErnieTinyTokenizer.from_pretrained(args.from_pretrained)

    place = F.CUDAPlace(0)
    with FD.guard(place):
        model = ErnieModelForSequenceClassification.from_pretrained(
            args.from_pretrained, num_labels=3, name='')
        if not args.eval:
            feature_column = propeller.data.FeatureColumns([
                propeller.data.TextColumn('seg_a',
                                          unk_id=tokenizer.unk_id,
                                          vocab_dict=tokenizer.vocab,
                                          tokenizer=tokenizer.tokenize),
                propeller.data.LabelColumn('label'),
            ])

            def map_fn(seg_a, label):
                seg_a, _ = tokenizer.truncate(seg_a, [],
                                              seqlen=args.max_seqlen)
                sentence, segments = tokenizer.build_for_ernie(seg_a, [])
                return sentence, segments, label
Ejemplo n.º 5
0
     tokenizer.encode(row[0], row[1], args.max_seqlen - 3) + (row[2], )
     for row in dev_data
 ]
 # print(np.percentile([len(row[0]) for row in train_features], [0, 50, 95, 99, 100]))
 # print(np.percentile([len(row[0]) for row in dev_features], [0, 50, 95, 99, 100]))
 # to batch
 print('start training...')
 bst_f1, global_step = 0, 0
 args.max_steps = (len(train_features) // args.bsz + 1) * args.epochs
 try:
     place = F.CUDAPlace(0)
 except:
     place = F.CPUPlace()
 with FD.guard(place):
     if 'ernie' in args.from_pretrained:
         model = ErnieModelForSequenceClassification.from_pretrained(
             args.from_pretrained, num_labels=2, name='')
         if args.init_checkpoint is not None:
             print('loading checkpoint from %s' % args.init_checkpoint)
             sd, _ = FD.load_dygraph(args.init_checkpoint)
             model.set_dict(sd)
     elif 'wwm' in args.from_pretrained:
         config = json.load(
             open(os.path.join(args.from_pretrained, 'ernie_config.json'),
                  'rt',
                  encoding='utf-8'))
         config['num_labels'] = 2
         model = ErnieModelForSequenceClassification(config)
         # print(model)
         print('loading checkpoint from %s' % 'chinese_roberta_wwm_pp')
         sd, _ = FD.load_dygraph('%s/roberta_wwm.pdparams' %
                                 args.from_pretrained)
Ejemplo n.º 6
0
    print('converting data to ernie format')
    test_features = [tokenizer.encode(row[0], row[1], args.max_seqlen-3) + (0, ) for row in test_data]

    # print(np.percentile([len(row[0]) for row in train_features], [0, 50, 95, 99, 100]))
    # print(np.percentile([len(row[0]) for row in dev_features], [0, 50, 95, 99, 100]))
    # to batch
    try:
        place = F.CUDAPlace(0)
    except:
        place = F.CPUPlace()
    with FD.guard(place):
        if 'wwm' in args.from_pretrained:
            config = json.load(open(os.path.join(args.from_pretrained, 'ernie_config.json'), 'rt', encoding='utf-8'))
            config['num_labels'] = 2
            model = ErnieModelForSequenceClassification(config)
            # print(model)
            print('loading checkpoint from %s' % 'chinese_roberta_wwm_pp')
            sd, _ = FD.load_dygraph('%s/roberta_wwm.pdparams' % args.from_pretrained)
            for k, v in model.state_dict().items():
                if k not in sd:
                    print('param:%s not set in pretrained model, skip' % k)
                    sd[k] = v # FIXME: no need to do this in the future
            model.set_dict(sd)
        else:       
            model = ErnieModelForSequenceClassification.from_pretrained(args.from_pretrained, num_labels=2, name='')
            if args.init_checkpoint is not None:
                print('loading checkpoint from %s' % args.init_checkpoint)
                sd, _ = FD.load_dygraph(args.init_checkpoint)
                model.set_dict(sd)
Ejemplo n.º 7
0
def evaluate_teacher(model, dataset):
    all_pred, all_label = [], []
    with D.base._switch_tracer_mode_guard_(is_train=False):
        model.eval()
        for step, (ids_student, ids, _, labels) in enumerate(dataset.start()):
            _, logits = model(ids)
            pred = L.argmax(logits, -1)
            all_pred.extend(pred.numpy())
            all_label.extend(labels.numpy())
        f1 = f1_score(all_label, all_pred, average='macro')
        model.train()
        return f1


teacher_model = ErnieModelForSequenceClassification.from_pretrained(
    'ernie-1.0', num_labels=2)
teacher_model.train()
if not os.path.exists('./teacher_model.pdparams'):
    opt = AdamW(learning_rate=LinearDecay(LR, 9600 * EPOCH * 0.1 / BATCH,
                                          9600 * EPOCH / BATCH),
                parameter_list=teacher_model.parameters(),
                weight_decay=0.01)
    g_clip = F.dygraph_grad_clip.GradClipByGlobalNorm(1.0)
    for epoch in range(EPOCH):
        for step, (ids_student, ids, sids,
                   labels) in enumerate(train_ds.start(place)):
            loss, logits = teacher_model(ids, labels=labels)
            loss.backward()
            if step % 10 == 0:
                print('[step %03d] teacher train loss %.5f lr %.3e' %
                      (step, loss.numpy(), opt.current_step_lr()))
Ejemplo n.º 8
0
import paddle.fluid as F
from ernie.modeling_ernie import ErnieModelForSequenceClassification
import paddle.fluid.dygraph as FD
import numpy as np
from paddle.fluid.dygraph import Linear, to_variable, TracedLayer
place = F.CUDAPlace(0)
with FD.guard(place):
    model = ErnieModelForSequenceClassification.from_pretrained(
        'ernie-2.0-large-en', num_labels=1024, name='q')
    """load MoCo pretrained model"""
    state_dict = F.load_dygraph('./model')[0]
    for each in state_dict.keys():
        print(each)
    for key in list(state_dict.keys()):
        if 'encoder_q' in key:
            print(key[10:])
            new_key = key[10:]
            state_dict[new_key] = state_dict[key]
        del state_dict[key]
    for key in list(state_dict.keys()):
        if key == 'classifier.0.weight':
            new_key = 'classifier.weight'
            state_dict[new_key] = state_dict[key]
            del state_dict[key]
        if key == 'classifier.0.bias':
            new_key = 'classifier.bias'
            state_dict[new_key] = state_dict[key]
            del state_dict[key]
        if key == 'classifier.2.weight' or key == 'classifier.2.bias':
            del state_dict[key]
    state_dict['classifier.weight'] = state_dict['classifier.weight'][:1024, :]