コード例 #1
0
ファイル: regular.py プロジェクト: ueiyang/few-shot-nlp
def train_one(task, model, opt,args,grad):
    '''
        Train the model on one sampled task.
    '''
    model['ebd'].train()
    model['clf'].train()
    opt.zero_grad()

    support, query = task

    # Embedding the document
    if args.embedding == 'bert':
        with torch.no_grad():
            XS = model['ebd'](support)['sentence_embedding']
            XQ = model['ebd'](query)['sentence_embedding']
        
    YS = support['label']
    YQ = query['label']

    # Apply the classifier
    _, loss = model['clf'](XS, YS, XQ, YQ)

    if loss is not None:
        loss.backward()

    if torch.isnan(loss):
        # do not update the parameters if the gradient is nan
        # print("NAN detected")
        # print(model['clf'].lam, model['clf'].alpha, model['clf'].beta)
        return

    grad['clf'].append(get_norm(model['clf']))
    grad['ebd'].append(get_norm(model['ebd']))

    opt.step()
コード例 #2
0
def _meta_update(model, total_grad, opt, task, maml_batchsize, clip_grad):
    '''
        Aggregate the gradients in total_grad
        Update the initialization in model
    '''

    model['ebd'].train()
    model['clf'].train()
    support, query = task
    XS = model['ebd'](support)
    pred = model['clf'](XS)
    loss = torch.sum(pred)  # this doesn't matter

    # aggregate the gradients (skip nan)
    avg_grad = {
            'ebd': {key: sum(g[key] for g in total_grad['ebd'] if
                        not torch.sum(torch.isnan(g[key])) > 0)\
                    for key in total_grad['ebd'][0].keys()},
            'clf': {key: sum(g[key] for g in total_grad['clf'] if
                        not torch.sum(torch.isnan(g[key])) > 0)\
                    for key in total_grad['clf'][0].keys()}
            }

    # register a hook on each parameter in the model that replaces
    # the current dummy grad with the meta gradiets
    hooks = []
    for model_name in avg_grad.keys():
        for key, value in model[model_name].named_parameters():
            if not value.requires_grad:
                continue

            def get_closure():
                k = key
                n = model_name

                def replace_grad(grad):
                    return avg_grad[n][k] / maml_batchsize

                return replace_grad

            hooks.append(value.register_hook(get_closure()))

    opt.zero_grad()
    loss.backward()

    ebd_grad = get_norm(model['ebd'])
    clf_grad = get_norm(model['clf'])
    if clip_grad is not None:
        nn.utils.clip_grad_value_(grad_param(model, ['ebd', 'clf']), clip_grad)

    opt.step()

    for h in hooks:
        # remove the hooks before the next training phase
        h.remove()

    total_grad['ebd'] = []
    total_grad['clf'] = []

    return ebd_grad, clf_grad
コード例 #3
0
ファイル: train.py プロジェクト: hccngu/MLADA
def train_one(task, model, optG, optD, args, grad):
    '''
        Train the model on one sampled task.
    '''
    model['G'].train()
    model['D'].train()
    model['clf'].train()

    support, query, source = task
    for _ in range(args.k):
        # ***************update D**************
        optD.zero_grad()

        # Embedding the document
        XS, XS_inputD, _ = model['G'](support, flag='support')
        YS = support['label']
        # print('YS', YS)

        XQ, XQ_inputD, _ = model['G'](query, flag='query')
        YQ = query['label']
        YQ_d = torch.ones(query['label'].shape, dtype=torch.long).to(query['label'].device)
        # print('YQ', set(YQ.numpy()))

        XSource, XSource_inputD, _ = model['G'](source, flag='query')
        YSource_d = torch.zeros(source['label'].shape, dtype=torch.long).to(source['label'].device)

        XQ_logitsD = model['D'](XQ_inputD)
        XSource_logitsD = model['D'](XSource_inputD)

        d_loss = F.cross_entropy(XQ_logitsD, YQ_d) + F.cross_entropy(XSource_logitsD, YSource_d)
        d_loss.backward(retain_graph=True)
        grad['D'].append(get_norm(model['D']))
        optD.step()

        # *****************update G****************
        optG.zero_grad()
        XQ_logitsD = model['D'](XQ_inputD)
        XSource_logitsD = model['D'](XSource_inputD)
        d_loss = F.cross_entropy(XQ_logitsD, YQ_d) + F.cross_entropy(XSource_logitsD, YSource_d)

        acc, d_acc, loss, _ = model['clf'](XS, YS, XQ, YQ, XQ_logitsD, XSource_logitsD, YQ_d, YSource_d)

        g_loss = loss - d_loss
        if args.ablation == "-DAN":
            g_loss = loss
            print("%%%%%%%%%%%%%%%%%%%This is ablation mode: -DAN%%%%%%%%%%%%%%%%%%%%%%%%%%")
        g_loss.backward(retain_graph=True)
        grad['G'].append(get_norm(model['G']))
        grad['clf'].append(get_norm(model['clf']))
        optG.step()

    return d_acc
コード例 #4
0
def train_one(task, model, opt, args, grad):
    '''
        Train the model on one sampled task.
    '''
    model['ebd'].train()
    if not args.classifier == 'nn':
        model['clf'].train()
        opt.zero_grad()

    support, query = task

    # Embedding the document
    XS = model['ebd'](support)
    YS = support['label']

    XQ = model['ebd'](query)
    YQ = query['label']

    # Apply the classifier
    _, loss = model['clf'](XS, YS, XQ, YQ)

    print('loss: ', loss)

    if loss is not None:
        loss.backward()

    if torch.isnan(loss):
        # do not update the parameters if the gradient is nan
        print("NAN detected")
        print(model['clf'].lam, model['clf'].alpha, model['clf'].beta)
        return

    if args.clip_grad is not None:
        nn.utils.clip_grad_value_(grad_param(model, ['ebd', 'clf']),
                                  args.clip_grad)

    if args.classifier != 'nn':
        grad['clf'].append(get_norm(model['clf']))
    grad['ebd'].append(get_norm(model['ebd']))

    if args.classifier != 'nn':
        opt.step()