Esempio n. 1
0
            # sgd_opt.param_groups[0]['lr'] = -args.approx_lr
            # model.load_fast_weights(weights)
            # model.load_fast_gradients(meta_grad, 'meta')
            # sgd_opt.step()
            # fast_weights3 = inner_loop(args, (inner_loop_data, args.aux[selected]), model, iters = iters, use_prog_bar=False)

            # compute new gradient:
            for name in meta_grad:
                if name in fast_weights:
                    meta_grad[name].add_(
                        0.01 * (fast_weights[name] - fast_weights2[name]) /
                        args.approx_lr)

            # print(model.grad_sum(meta_grad))
            model.load_fast_gradients(meta_grad, 'meta')
            model.load_fast_weights(weights)

    meta_opt.step()
    info = 'Lang {}: loss={:.3f}, lr={:.8f}, batch_size={}, eposides={}'.format(
        args.aux[selected], export(loss_outer), meta_opt.param_groups[0]['lr'],
        bs_outter, iters)
    progressbar.update(1)
    progressbar.set_description(info)
    tokens = tokens + bs_outter

    if args.tensorboard and (not args.debug):
        writer.add_scalar('train/Loss', export(loss_outer), iters + 1)

    # ---- zero the self-embedding matrix
    if not args.no_meta_training:
Esempio n. 2
0
    # ------ outer-upate for reptile (parallel mode or sequential mode.)
    meta_opt.param_groups[0]['lr'] = get_learning_rate(
        iters + 1, disable=args.disable_lr_schedule)
    meta_opt.zero_grad()

    # -- virtual batch, only used to build the backward pass
    inputs, input_masks, targets, target_masks, sources, source_masks, encoding, batch_size = model.quick_prepare(
        next(iter(aux_reals[selected])))
    loss_outer = model.cost(targets,
                            target_masks,
                            out=model(encoding, source_masks, inputs,
                                      input_masks))
    loss_outer.backward()

    # -- load fast gradient...
    model.load_fast_gradients(fast_gradients, type='meta')
    meta_opt.step()

    info = 'Outer-loop (all): lr={:.8f}, loss (fake) ={}'.format(
        meta_opt.param_groups[0]['lr'], export(loss_outer))
    # progressbar.set_description(info)

    if args.tensorboard and (not args.debug):
        writer.add_scalar('train/Loss', export(loss_outer), iters)

    # -- zero-out self-embeddings
    model.encoder.out.weight.data[
        4:, :].zero_()  # ignore the first special tokens.

    iters = iters + 1
    eposides = eposides + 1