def train_one(task, model, opt,args,grad): ''' Train the model on one sampled task. ''' model['ebd'].train() model['clf'].train() opt.zero_grad() support, query = task # Embedding the document if args.embedding == 'bert': with torch.no_grad(): XS = model['ebd'](support)['sentence_embedding'] XQ = model['ebd'](query)['sentence_embedding'] YS = support['label'] YQ = query['label'] # Apply the classifier _, loss = model['clf'](XS, YS, XQ, YQ) if loss is not None: loss.backward() if torch.isnan(loss): # do not update the parameters if the gradient is nan # print("NAN detected") # print(model['clf'].lam, model['clf'].alpha, model['clf'].beta) return grad['clf'].append(get_norm(model['clf'])) grad['ebd'].append(get_norm(model['ebd'])) opt.step()
def _meta_update(model, total_grad, opt, task, maml_batchsize, clip_grad): ''' Aggregate the gradients in total_grad Update the initialization in model ''' model['ebd'].train() model['clf'].train() support, query = task XS = model['ebd'](support) pred = model['clf'](XS) loss = torch.sum(pred) # this doesn't matter # aggregate the gradients (skip nan) avg_grad = { 'ebd': {key: sum(g[key] for g in total_grad['ebd'] if not torch.sum(torch.isnan(g[key])) > 0)\ for key in total_grad['ebd'][0].keys()}, 'clf': {key: sum(g[key] for g in total_grad['clf'] if not torch.sum(torch.isnan(g[key])) > 0)\ for key in total_grad['clf'][0].keys()} } # register a hook on each parameter in the model that replaces # the current dummy grad with the meta gradiets hooks = [] for model_name in avg_grad.keys(): for key, value in model[model_name].named_parameters(): if not value.requires_grad: continue def get_closure(): k = key n = model_name def replace_grad(grad): return avg_grad[n][k] / maml_batchsize return replace_grad hooks.append(value.register_hook(get_closure())) opt.zero_grad() loss.backward() ebd_grad = get_norm(model['ebd']) clf_grad = get_norm(model['clf']) if clip_grad is not None: nn.utils.clip_grad_value_(grad_param(model, ['ebd', 'clf']), clip_grad) opt.step() for h in hooks: # remove the hooks before the next training phase h.remove() total_grad['ebd'] = [] total_grad['clf'] = [] return ebd_grad, clf_grad
def train_one(task, model, optG, optD, args, grad): ''' Train the model on one sampled task. ''' model['G'].train() model['D'].train() model['clf'].train() support, query, source = task for _ in range(args.k): # ***************update D************** optD.zero_grad() # Embedding the document XS, XS_inputD, _ = model['G'](support, flag='support') YS = support['label'] # print('YS', YS) XQ, XQ_inputD, _ = model['G'](query, flag='query') YQ = query['label'] YQ_d = torch.ones(query['label'].shape, dtype=torch.long).to(query['label'].device) # print('YQ', set(YQ.numpy())) XSource, XSource_inputD, _ = model['G'](source, flag='query') YSource_d = torch.zeros(source['label'].shape, dtype=torch.long).to(source['label'].device) XQ_logitsD = model['D'](XQ_inputD) XSource_logitsD = model['D'](XSource_inputD) d_loss = F.cross_entropy(XQ_logitsD, YQ_d) + F.cross_entropy(XSource_logitsD, YSource_d) d_loss.backward(retain_graph=True) grad['D'].append(get_norm(model['D'])) optD.step() # *****************update G**************** optG.zero_grad() XQ_logitsD = model['D'](XQ_inputD) XSource_logitsD = model['D'](XSource_inputD) d_loss = F.cross_entropy(XQ_logitsD, YQ_d) + F.cross_entropy(XSource_logitsD, YSource_d) acc, d_acc, loss, _ = model['clf'](XS, YS, XQ, YQ, XQ_logitsD, XSource_logitsD, YQ_d, YSource_d) g_loss = loss - d_loss if args.ablation == "-DAN": g_loss = loss print("%%%%%%%%%%%%%%%%%%%This is ablation mode: -DAN%%%%%%%%%%%%%%%%%%%%%%%%%%") g_loss.backward(retain_graph=True) grad['G'].append(get_norm(model['G'])) grad['clf'].append(get_norm(model['clf'])) optG.step() return d_acc
def train_one(task, model, opt, args, grad): ''' Train the model on one sampled task. ''' model['ebd'].train() if not args.classifier == 'nn': model['clf'].train() opt.zero_grad() support, query = task # Embedding the document XS = model['ebd'](support) YS = support['label'] XQ = model['ebd'](query) YQ = query['label'] # Apply the classifier _, loss = model['clf'](XS, YS, XQ, YQ) print('loss: ', loss) if loss is not None: loss.backward() if torch.isnan(loss): # do not update the parameters if the gradient is nan print("NAN detected") print(model['clf'].lam, model['clf'].alpha, model['clf'].beta) return if args.clip_grad is not None: nn.utils.clip_grad_value_(grad_param(model, ['ebd', 'clf']), args.clip_grad) if args.classifier != 'nn': grad['clf'].append(get_norm(model['clf'])) grad['ebd'].append(get_norm(model['ebd'])) if args.classifier != 'nn': opt.step()