def train(model, train_loader, epoch): # average meters to record the training statistics batch_time = util.AverageMeter() data_time = util.AverageMeter() # switch to train mode model.switch_to_train() progbar = Progbar(len(train_loader.dataset)) end = time.time() for i, train_data in enumerate(train_loader): data_time.update(time.time() - end) vis_input, txt_input, _, _, _ = train_data loss = model.train(vis_input, txt_input) progbar.add(vis_input.size(0), values=[('data_time', data_time.val), ('batch_time', batch_time.val), ('loss', loss)]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # Record logs in tensorboard writer.add_scalar('train/Loss', loss, model.iters)
def main(n_aggregation, dim_feature, n_epochs, batch_size, eps): W = np.random.normal(0, 0.4, [dim_feature, dim_feature]) A = np.random.normal(0, 0.4, dim_feature) b = np.array([0.]) model = GraphNeuralNetwork(W, A, b, n_aggregation=n_aggregation) optimizer = Adam(model) dataset = util.get_train_data('../../datasets') train_data, valid_data = util.random_split(dataset, train_ratio=0.5) print('train_size: %d, valid_size: %d' % (len(train_data), len(valid_data))) for epoch in range(n_epochs): train_loss = util.AverageMeter() train_acc = util.AverageMeter() for graphs, labels in util.get_shuffled_batches( train_data, batch_size): grads_flat = 0 for graph, label in zip(graphs, labels): x = np.zeros([len(graph), dim_feature]) x[:, 0] = 1 grads_flat += calc_grads(model, graph, x, label, bce_with_logit, eps) / batch_size outputs = model(graph, x) train_loss.update(bce_with_logit(outputs, label), 1) train_acc.update((sigmoid(outputs) > 0.5) == label, 1) optimizer.update(grads_flat) valid_loss, valid_acc = test(model, valid_data, dim_feature) print( 'epoch: %d, train_loss: %f, train_acc: %f, valid_loss: %f, vald_acc: %f' % (epoch, train_loss.avg, train_acc.avg, valid_loss, valid_acc))
def eval_ensemble(split, ens, dataloaders, args, eval_batch_size=4): ens.eval() eval_dataloader = torch.utils.data.DataLoader(dataloaders[split].dataset, batch_size=eval_batch_size) loss = nn.NLLLoss() caps_to_score = data.get_responses(args.dataset, args.data_folder) loss_meter = util.AverageMeter() acc_meter = util.AverageMeter() for (imgs, caps, caplens, *idx) in tqdm(eval_dataloader, desc="Eval ensemble"): batch_size = imgs.shape[0] if args.cuda: imgs = imgs.cuda() # caps -> class indices cidx = data.to_text(caps, ens.vocab) cidx = torch.tensor([caps_to_score.index(i) for i in cidx], dtype=torch.int64) cidx = cidx.to(imgs.device) with torch.no_grad(): ens_scores = ens.score_captions_for_imgs(imgs, caps_to_score) ens_loss = loss(ens_scores.log(), cidx) loss_meter.update(ens_loss.item(), batch_size) acc = (ens_scores.argmax(1) == cidx).float().mean().item() acc_meter.update(acc, batch_size) return {"acc": acc_meter.avg, "loss": loss_meter.avg}
def test(val_loader, model, criterion, cuda, print_freq): batch_time = util.AverageMeter() losses = util.AverageMeter() top1 = util.AverageMeter() prfa = util.AverageMeterPRFA() # switch to evaluate mode model.eval() end = time.time() for i, (input, target, seq_lengths) in enumerate(val_loader): if cuda: input = input.cuda() target = target.cuda() # compute output output = model(input, seq_lengths) loss = criterion(output, target) # measure accuracy and record loss prfa_all = util.prf_multi_classify(output.data, target, topk=(1, )) prfa.update(prfa_all, seq_lengths.size(0)) prec1 = util.accuracy(output.data, target, topk=(1, )) losses.update(loss.data, input.size(0)) top1.update(prec1[0][0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() four_classify_loop_print(i, print_freq, val_loader, batch_time, losses, top1, prfa) four_classify_last_print(top1, prfa) return top1.avg
def samplewise_perturbation_eval(random_noise, data_loader, model, eval_target='train_dataset', mask_cord_list=[]): loss_meter = util.AverageMeter() err_meter = util.AverageMeter() # random_noise = random_noise.to(device) model = model.to(device) idx = 0 for i, (images, labels) in enumerate(data_loader[eval_target]): images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True) if random_noise is not None: for i, (image, label) in enumerate(zip(images, labels)): if not torch.is_tensor(random_noise): sample_noise = torch.tensor(random_noise[idx]).to(device) else: sample_noise = random_noise[idx].to(device) c, h, w = image.shape[0], image.shape[1], image.shape[2] mask = np.zeros((c, h, w), np.float32) x1, x2, y1, y2 = mask_cord_list[idx] mask[:, x1: x2, y1: y2] = sample_noise.cpu().numpy() sample_noise = torch.from_numpy(mask).to(device) images[i] = images[i] + sample_noise idx += 1 pred = model(images) err = (pred.data.max(1)[1] != labels.data).float().sum() loss = torch.nn.CrossEntropyLoss()(pred, labels) loss_meter.update(loss.item(), len(labels)) err_meter.update(err / len(labels)) return loss_meter.avg, err_meter.avg
def train_fn(self, train_data_loader, model, optimizer, device, scheduler): model.train() losses = util.AverageMeter() jaccards = util.AverageMeter() tk0 = tqdm(train_data_loader, total=len(train_data_loader)) for bi, d in enumerate(tk0): ids = d["ids"] token_type_ids = d["token_type_ids"] mask = d["mask"] targets_start = d["targets_start"] targets_end = d["targets_end"] sentiment = d["sentiment"] orig_selected = d["orig_selected"] orig_tweet = d["orig_tweet"] targets_start = d["targets_start"] targets_end = d["targets_end"] offsets = d["offsets"] ids = ids.to(device, dtype=torch.long) token_type_ids = token_type_ids.to(device, dtype=torch.long) mask = mask.to(device, dtype=torch.long) targets_start = targets_start.to(device, dtype=torch.long) targets_end = targets_end.to(device, dtype=torch.long) model.zero_grad() outputs_start, outputs_end = model( ids=ids, mask_token_type_ids=mask, token_type_ids=token_type_ids, ) loss = self.loss_fn(outputs_start, outputs_end, targets_start, targets_end) loss.backward() optimizer.step() scheduler.step() outputs_start = torch.softmax(outputs_start, dim=1).cpu().detach().numpy() outputs_end = torch.softmax(outputs_end, dim=1).cpu().detach().numpy() jaccard_scores = [] for px, tweet in enumerate(orig_tweet): selected_tweet = orig_selected[px] tweet_sentiment = sentiment[px] jaccard_score, _ = util.calculate_jaccard_score( original_tweet=tweet, target_string=selected_tweet, sentiment_val=tweet_sentiment, idx_start=np.argmax(outputs_start[px, :]), idx_end=np.argmax(outputs_end[px, :]), offsets=offsets[px]) jaccard_scores.append(jaccard_score) jaccards.update(np.mean(jaccard_scores), ids.size(0)) losses.update(loss.item(), ids.size(0)) tk0.set_postfix(loss=losses.avg, jaccard=jaccards.avg)
def test_keann_kg(val_loader, model, criterion, cuda, print_freq, pdtb_category=''): batch_time = util.AverageMeter() losses = util.AverageMeter() top1 = util.AverageMeter() prfa = util.AverageMeterPRFA() if pdtb_category != '': prfa = util.AverageBinaryMeterPRFA() # switch to evaluate mode model.eval() end = time.time() for i, (arg1, arg2, target, seq_lengths, transE_tensor_arg1, transE_tensor_arg2, batch_original) in enumerate(val_loader): arg1 = Variable(arg1, requires_grad=False) arg2 = Variable(arg2, requires_grad=False) target = Variable(target, requires_grad=False) if cuda: arg1 = arg1.cuda() arg2 = arg2.cuda() target = target.cuda() # compute output output = model((arg1, arg2), seq_lengths, (transE_tensor_arg1, transE_tensor_arg2), cuda) # hot_image(model.encoder(arg1), model.encoder(arg2), batch_original, model.rand_matrix, model.kg_relation) loss = criterion(output, target) # measure accuracy and record loss prfa_all = util.prf_multi_classify(output.data, target, topk=(1, )) prfa.update(prfa_all, seq_lengths.size(0)) prec1 = util.accuracy(output.data, target, topk=(1, )) losses.update(loss.data.cpu().numpy(), seq_lengths.size(0)) top1.update(prec1[0][0], seq_lengths.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if pdtb_category != '': binary_classify_loop_print(i, print_freq, val_loader, batch_time, losses, top1, prfa, pdtb_category) else: four_classify_loop_print(i, print_freq, val_loader, batch_time, losses, top1, prfa) if pdtb_category != '': binary_classify_last_print(top1, prfa) else: four_classify_last_print(top1, prfa) return top1.avg
def test(model, dataset, dim_feature): """Function for model evaluation""" acc = util.AverageMeter() loss = util.AverageMeter() for graph, label in dataset: x = np.zeros([len(graph), dim_feature]) x[:, 0] = 1 outputs = model(graph, x) loss.update(bce_with_logit(outputs, label), 1) acc.update((sigmoid(outputs) > 0.5) == label, 1) return loss.avg, acc.avg
def train(train_loader, model, criterion, optimizer, epoch, args, logger, time_logger): ''' -------------------------averageMeter 선언.-----------------------------''' batch_time = util.AverageMeter('Time', ':6.3f') data_time = util.AverageMeter('Data', ':6.3f') losses = util.AverageMeter('Loss', ':.4f') top1 = util.AverageMeter('Acc@1', ':6.2f') top5 = util.AverageMeter('Acc@5', ':6.2f') ''' -------------------------출력 progress 선언.-----------------------------''' progress = util.ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) ''' -------------------------학습 시작.-----------------------------''' model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) output = model(images) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() # Gradient averaging average_gradients(model) ''' -------------------------이미지넷 top1, top5 accuracy----------------------------''' acc1, acc5, correct = util.accuracy(output, target, topk=(1, 5)) ''' -------------------------각 GPU log 합쳐주기-----------------------------''' reduced_loss = reduce_tensor(loss.data) reduced_top1 = reduce_tensor(acc1[0].data) reduced_top5 = reduce_tensor(acc5[0].data) ''' ------------------------- averageMeter에 업데이트 -----------------------------''' losses.update(reduced_loss.item(), images.size(0)) top1.update(reduced_top1.item(), images.size(0)) top5.update(reduced_top5.item(), images.size(0)) batch_time.update(time.time() - end) end = time.time() ''' ------------------------- gpu 하나로만 출력하기. (rank == 0 : 0번 gpu에서만 출력하도록.)-----------------------------''' if dist.get_rank() == 0: if i % args.print_freq == 0: progress.display(i) ''' ------------------------- logger 에 업데이트-----------------------------''' if dist.get_rank() == 0: logger.write([epoch, losses.avg, top1.avg, top5.avg]) time_logger.write([epoch, batch_time.avg, data_time.avg])
def train_keann(train_loader, model, criterion, optimizer, epoch, cuda, clip, print_freq): batch_time = util.AverageMeter() data_time = util.AverageMeter() losses = util.AverageMeter() top1 = util.AverageMeter() # switch to train mode model.train() end = time.time() for i, (arg1, arg2, target, seq_lengths) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if cuda: arg1 = arg1.cuda() arg2 = arg2.cuda() target = target.cuda() # compute output output = model((arg1, arg2), seq_lengths) loss = criterion(output, target) # measure accuracy and record loss prec1 = util.accuracy(output.data, target, topk=(1, )) losses.update(loss.data, seq_lengths.size(0)) top1.update(prec1[0][0], seq_lengths.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i != 0 and i % print_freq == 0: print( 'Epoch: [{0}][{1}/{2}] Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) gc.collect()
def __init__(self, data_loader, logger, config): self.loss_meters = util.AverageMeter() self.acc_meters = util.AverageMeter() self.acc5_meters = util.AverageMeter() self.criterion = torch.nn.CrossEntropyLoss() self.data_loader = data_loader self.logger = logger self.log_frequency = config.log_frequency if config.log_frequency is not None else 100 self.config = config self.current_acc = 0 self.current_acc_top5 = 0 self.confusion_matrix = torch.zeros(config.num_classes, config.num_classes) return
def __init__(self, args): super(TestLogger, self).__init__(args) self.steps_per_print = args.steps_per_print self.steps_per_visual = args.steps_per_visual self.experiment_name = args.name self.num_epochs = args.num_epochs self.masked_loss_meter = util.AverageMeter() self.full_loss_meter = util.AverageMeter() self.obscured_loss_meter = util.AverageMeter() self.pbar = tqdm(total=int(self.num_epochs / self.steps_per_print)) self.train_start_time = time()
def test_keann(val_loader, model, criterion, cuda, print_freq, pdtb_category=''): batch_time = util.AverageMeter() losses = util.AverageMeter() top1 = util.AverageMeter() prfa = util.AverageMeterPRFA() if pdtb_category != '': prfa = util.AverageBinaryMeterPRFA() # switch to evaluate mode model.eval() end = time.time() for i, (arg1, arg2, target, seq_lengths) in enumerate(val_loader): if cuda: arg1 = arg1.cuda() arg2 = arg2.cuda() target = target.cuda() # compute output output = model((arg1, arg2), seq_lengths) loss = criterion(output, target) # measure accuracy and record loss prfa_all = util.prf_multi_classify(output.data, target, topk=(1, )) prfa.update(prfa_all, seq_lengths.size(0)) prec1 = util.accuracy(output.data, target, topk=(1, )) losses.update(loss.data, seq_lengths.size(0)) top1.update(prec1[0][0], seq_lengths.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if pdtb_category != '': binary_classify_loop_print(i, print_freq, val_loader, batch_time, losses, top1, prfa, pdtb_category) else: four_classify_loop_print(i, print_freq, val_loader, batch_time, losses, top1, prfa) if pdtb_category != '': binary_classify_last_print(top1, prfa) else: four_classify_last_print(top1, prfa) return top1.avg
def validate(val_loader, model, criterion, epoch, args, logger, time_logger): batch_time = util.AverageMeter('Time', ':6.3f') data_time = util.AverageMeter('Data', ':6.3f') losses = util.AverageMeter('Loss', ':.4f') top1 = util.AverageMeter('Acc@1', ':6.2f') top5 = util.AverageMeter('Acc@5', ':6.2f') progress = util.ProgressMeter(len(val_loader), [batch_time, data_time, losses, top1, top5], prefix='Test: ') model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): data_time.update(time.time() - end) if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) output = model(images) loss = criterion(output, target) acc1, acc5, correct = util.accuracy(output, target, topk=(1, 5)) reduced_loss = reduce_tensor(loss.data) reduced_top1 = reduce_tensor(acc1[0].data) reduced_top5 = reduce_tensor(acc5[0].data) losses.update(reduced_loss.item(), images.size(0)) top1.update(reduced_top1.item(), images.size(0)) top5.update(reduced_top5.item(), images.size(0)) batch_time.update(time.time() - end) end = time.time() if dist.get_rank() == 0: if i % args.print_freq == 0: progress.display(i) if dist.get_rank() == 0: print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) if dist.get_rank() == 0: logger.write([epoch, losses.avg, top1.avg, top5.avg]) time_logger.write([epoch, batch_time.avg, data_time.avg]) return top1.avg
def evaluate(model, data_loader, device): nll_meter = util.AverageMeter() model.eval() with torch.no_grad(), \ tqdm(total=len(data_loader.dataset)) as progress_bar: for features, y in data_loader: # Setup for forward batch_size = y.shape[0] # Forward outputs = model(features) y = y.to(device) loss = loss_fn(outputs, y) nll_meter.update(loss.item(), batch_size) # Log info progress_bar.update(batch_size) progress_bar.set_postfix(NLL=nll_meter.avg) model.train() results_list = [('NLL', nll_meter.avg)] results = OrderedDict(results_list) return results
def train_on_epoch(self, optimizer, loader, epoch, validation=False): print(f"Starting epoch {epoch}, validation: {validation} " + "=" * 30, flush=True) loss_value = util.AverageMeter() # house keeping self.model.run() if self.lr_schedule(epoch + 1) != self.lr_schedule(epoch): files.save_checkpoint_all( self.checkpoint_dir, self.model, args.arch, optimizer, self.L, epoch, lowest=False, save_str='pre-lr-drop') lr = self.lr_schedule(epoch) for pg in optimizer.param_groups: pg['lr'] = lr criterion_fn = torch.nn.CrossEntropyLoss() for index, (data, label, selected) in enumerate(loader): start_tm = time.time() global_step = epoch * len(loader) + index if global_step * args.batch_size >= self.optimize_times[-1]: # optimize labels ######################################### self.model.headcount = 1 print('Optimizaton starting', flush=True) with torch.no_grad(): _ = self.optimize_times.pop() self.update_assignment(global_step) data = data.to(self.device) mass = data.size(0) outputs = self.model(data) # train CNN #################################################### if self.num_heads == 1: loss = criterion_fn(outputs, self.L[0, selected]) else: loss = torch.mean(torch.stack([ criterion_fn(outputs[head_index], self.L[head_index, selected]) for head_index in range(self.num_heads)] )) optimizer.zero_grad() loss.backward() optimizer.step() loss_value.update(loss.item(), mass) data = 0 # some logging stuff ############################################################## if index % args.log_iter == 0 and self.writer: self.writer.add_scalar('lr', self.lr_schedule(epoch), global_step) print(global_step, f" Loss: {loss.item():.3f}", flush=True) print(global_step, f" Freq: {mass / (time.time() - start_tm):.2f}", flush=True) if writer: self.writer.add_scalar('Loss', loss.item(), global_step) if index > 0: self.writer.add_scalar('Freq(Hz)', mass / (time.time() - start_tm), global_step) # end of epoch logging ################################################################ if self.writer and (epoch % args.log_intv == 0): util.write_conv(self.writer, self.model, epoch=epoch) files.save_checkpoint_all(self.checkpoint_dir, self.model, args.arch, optimizer, self.L, epoch, lowest=False) return {'loss': loss_value.avg}
def optimize_epoch(self, optimizer, loader, epoch, validation=False): print(f"Starting epoch {epoch}, validation: {validation} " + "="*30,flush=True) loss_value = util.AverageMeter() # house keeping self.model.train() if self.lr_schedule(epoch+1) != self.lr_schedule(epoch): files.save_checkpoint_all(self.checkpoint_dir, self.model, args.arch, optimizer, self.L, epoch, lowest=False, save_str='pre-lr-drop') lr = self.lr_schedule(epoch) for pg in optimizer.param_groups: pg['lr'] = lr XE = torch.nn.CrossEntropyLoss() for iter, (data, label, selected) in enumerate(loader): now = time.time() niter = epoch * len(loader) + iter if niter*args.batch_size >= self.optimize_times[-1]: ############ optimize labels ######################################### self.model.headcount = 1 print('Optimizaton starting', flush=True) with torch.no_grad(): _ = self.optimize_times.pop() self.optimize_labels(niter) data = data.to(self.dev) mass = data.size(0) final = self.model(data) #################### train CNN #################################################### if self.hc == 1: loss = XE(final, self.L[0, selected]) else: loss = torch.mean(torch.stack([XE(final[h], self.L[h, selected]) for h in range(self.hc)])) optimizer.zero_grad() loss.backward() optimizer.step() loss_value.update(loss.item(), mass) data = 0 # some logging stuff ############################################################## if iter % args.log_iter == 0: if self.writer: self.writer.add_scalar('lr', self.lr_schedule(epoch), niter) print(niter, " Loss: {0:.3f}".format(loss.item()), flush=True) print(niter, " Freq: {0:.2f}".format(mass/(time.time() - now)), flush=True) if writer: self.writer.add_scalar('Loss', loss.item(), niter) if iter > 0: self.writer.add_scalar('Freq(Hz)', mass/(time.time() - now), niter) # end of epoch logging ################################################################ if self.writer and (epoch % args.log_intv == 0): util.write_conv(self.writer, self.model, epoch=epoch) files.save_checkpoint_all(self.checkpoint_dir, self.model, args.arch, optimizer, self.L, epoch, lowest=False) return {'loss': loss_value.avg}
def train(epoch, net, trainloader, device, optimizer, scheduler, loss_fn, max_grad_norm): global global_step print('\nEpoch: %d' % epoch) net.train() loss_meter = util.AverageMeter() with tqdm(total=len(trainloader.dataset)) as progress_bar: for x, cond_x in trainloader: x , cond_x = x.to(device), cond_x.to(device) optimizer.zero_grad() z, sldj = net(x, cond_x, reverse=False) loss = loss_fn(z, sldj) loss_meter.update(loss.item(), x.size(0)) loss.backward() if max_grad_norm > 0: util.clip_grad_norm(optimizer, max_grad_norm) optimizer.step() scheduler.step(global_step) progress_bar.set_postfix(nll=loss_meter.avg, bpd=util.bits_per_dim(x, loss_meter.avg), lr=optimizer.param_groups[0]['lr']) progress_bar.update(x.size(0)) global_step += x.size(0) print('Saving...') state = { 'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch, } torch.save(state, 'savemodel/cINN/checkpoint_' + str(epoch) + '.tar')
def _init_loss_meters(self): loss_meters = {} if self.do_classify: loss_meters['cls_loss'] = util.AverageMeter() return loss_meters
def evaluate(test_loader, model, criterion, n_iter=-1, verbose=False, device='cuda'): """ Standard evaluation loop. """ loss_meter = util.AverageMeter() # switch to evaluate mode model.train() with torch.no_grad(): #end = time.time() #bpd = 0 for i, (x, target) in enumerate(test_loader): # early stop if i >= 100: break x = x.to('cuda') z, sldj = model(x, reverse=False) loss = criterion(z, sldj) loss_meter.update(loss.item(), x.size(0)) bpd = util.bits_per_dim(x, loss_meter.avg) return bpd
def test(epoch, net, testloader, device, loss_fn, num_samples, save_dir): global best_loss net.eval() loss_meter = util.AverageMeter() with tqdm(total=len(testloader.dataset)) as progress_bar: for x, _ in testloader: x = x.to(device) z, sldj = net(x, reverse=False) loss = loss_fn(z, sldj) loss_meter.update(loss.item(), x.size(0)) progress_bar.set_postfix(nll=loss_meter.avg, bpd=util.bits_per_dim(x, loss_meter.avg)) progress_bar.update(x.size(0)) # Save checkpoint if loss_meter.avg < best_loss: print('Saving...') state = { 'net': net.state_dict(), 'test_loss': loss_meter.avg, 'epoch': epoch, } os.makedirs('save', exist_ok=True) torch.save(state, 'save/best.pth.tar') best_loss = loss_meter.avg # Save samples and data images = sample(net, num_samples, device) os.makedirs(save_dir, exist_ok=True) images_concat = torchvision.utils.make_grid(images, nrow=int(num_samples ** 0.5), padding=2, pad_value=255) torchvision.utils.save_image(images_concat, os.path.join(save_dir, 'epoch_{}.png'.format(epoch)))
def train(epoch, net, trainloader, device, optimizer, scheduler, loss_fn, max_grad_norm): global global_step print('\nEpoch: %d' % epoch) net.train() loss_meter = util.AverageMeter() with tqdm(total=len(trainloader.dataset)) as progress_bar: for x, _ in trainloader: x = x.to(device) optimizer.zero_grad() z, sldj = net(x, reverse=False) loss = loss_fn(z, sldj) loss_meter.update(loss.item(), x.size(0)) loss.backward() if max_grad_norm > 0: util.clip_grad_norm(optimizer, max_grad_norm) optimizer.step() progress_bar.set_postfix(nll=loss_meter.avg, bpd=util.bits_per_dim(x, loss_meter.avg), lr=optimizer.param_groups[0]['lr']) progress_bar.update(x.size(0)) scheduler.step(global_step) global_step += x.size(0)
def test(epoch, net, testloader, device, num_samples, best_loss): net.eval() loss_meter = util.AverageMeter() with tqdm(total=len(testloader.dataset)) as progress_bar: for x, _ in testloader: x = x.to(device) x_q = sample(net, m=64, n_ch=3, im_w=32, im_h=32, K=100, device=device) loss = net(x_q).mean() - net(x).mean() loss_meter.update(loss.item(), x.size(0)) progress_bar.set_postfix(nll=loss_meter.avg, bpd=util.bits_per_dim(x, loss_meter.avg)) progress_bar.update(x.size(0)) # Save checkpoint if loss_meter.avg < best_loss: print('Saving...') state = { 'net': net.state_dict(), 'test_loss': loss_meter.avg, 'epoch': epoch, } os.makedirs('ckpts', exist_ok=True) torch.save(state, 'ckpts/best_ebm.pth.tar') best_loss = loss_meter.avg # Save samples and data images = sample(net, m=64, n_ch=3, im_w=32, im_h=32, K=100, device=device) os.makedirs('ebm_samples', exist_ok=True) images_concat = torchvision.utils.make_grid(images, nrow=int(num_samples ** 0.5), padding=2, pad_value=255) torchvision.utils.save_image(images_concat, 'ebm_samples/epoch_{}.png'.format(epoch)) return best_loss
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2): nll_meter = util.AverageMeter() model.eval() pred_dict = {} with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(data_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) #print("ckpt 1") ans_lens = y2 - y1 loss = 0 for i in range(max_len): mask = ((torch.ones_like(y1) * i) == ans_lens).type( torch.cuda.LongTensor) y = y1 * mask loss += F.nll_loss(log_p[:, :, i], y) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores log_p, ans_len = torch.max(log_p, dim=-1) starts = torch.max(log_p, dim=-1)[1] ends = starts for i in range(starts.size(0)): ends[i] += ans_len.type(torch.cuda.LongTensor)[i, starts[i]] # print("starts and ends:", starts, ends, starts.size(), ends.size()) # starts, ends = util.discretize(p, p + ans_lens, max_len, use_squad_v2) # Log info progress_bar.update(batch_size) progress_bar.set_postfix(NLL=nll_meter.avg) preds, _ = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), use_squad_v2) pred_dict.update(preds) model.train() results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) return results, pred_dict
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2): nll_meter = util.AverageMeter() model.eval() pred_dict = {} with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(data_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ans, ids in data_loader: # Setup for forward cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) ans = ans.to(device) batch_size = cw_idxs.size(0) # Forward logits = model(cw_idxs, qw_idxs,cc_idxs,qc_idxs) # y1, y2 = y1.to(device), y2.to(device) # loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss = nn.CrossEntropyLoss()(logits,ans.long()) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores # p1, p2 = log_p1.exp(), log_p2.exp() # starts, ends = util.discretize(p1, p2, max_len, use_squad_v2) # Log info progress_bar.update(batch_size) progress_bar.set_postfix(NLL=nll_meter.avg) # preds, _ = util.convert_tokens(gold_dict, # ids.tolist(), # starts.tolist(), # ends.tolist(), # use_squad_v2) preds, _ = util.convert_class(gold_dict, ids.tolist(), logits.tolist()) pred_dict.update(preds) model.train() results = util.eval_class(gold_dict, pred_dict) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] # if use_squad_v2: # results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) return results, pred_dict
def __init__(self, args, epoch, dataset_len): super(TrainLogger, self).__init__(args, epoch, dataset_len) self.iters_per_print = args.iters_per_print self.experiment_name = args.name self.max_eval = args.max_eval self.num_epochs = args.num_epochs self.loss_meter = util.AverageMeter()
def test(epoch, net, testloader, device, loss_fn, num_samples, in_channels, base_path, args): global best_loss global hists net.eval() loss_meters = [util.AverageMeter() for _ in range(3)] with tqdm(total=len(testloader.dataset)) as progress_bar: for x in testloader: if len(x) == 2 and type(x) is list: x = x[0] x = x.to(device) with torch.no_grad(): x_hat, mu, logvar, output_var = net(x) loss, reconstruction_loss, kl_loss = loss_fn( x, x_hat, mu, logvar, output_var) loss_meters[0].update(loss.item(), x.size(0)) loss_meters[1].update(reconstruction_loss.item(), x.size(0)) loss_meters[2].update(kl_loss.item(), x.size(0)) progress_bar.set_postfix(loss=loss_meters[0].avg, rc_loss=loss_meters[1].avg, kl_loss=loss_meters[2].avg) progress_bar.update(x.size(0)) # Save checkpoint if loss_meters[0].avg < best_loss: print('Saving...') state = { 'net': net.state_dict(), 'test_loss': loss_meters[0].avg, 'epoch': epoch, } ckpt_path = base_path / 'ckpts' ckpt_path.mkdir(exist_ok=True) best_path_ckpt = ckpt_path / 'best.pth.tar' torch.save(state, best_path_ckpt) best_loss = loss_meters[0].avg # Save samples and data sample_latent = args.latent_dim if args.model == 'VAE' else None images = sample(net, num_samples, device, in_channels, sample_latent) if images.shape[1] == 2: images = images[:, :1, :, :] if images.shape[1] == 6: images = images[:, :3, :, :] image_vals = images.detach().cpu().numpy().flatten() hist = np.histogram(image_vals, bins=100) hists.append(hist) hists_path = base_path / 'hists.pkl' with hists_path.open('wb') as f: pickle.dump(hists, f) samples_path = base_path / 'samples' samples_path.mkdir(exist_ok=True) epoch_path = samples_path / f'epoch_{epoch}.png' images_concat = torchvision.utils.make_grid(images, nrow=int(num_samples**0.5), padding=2, pad_value=255) torchvision.utils.save_image(images_concat, epoch_path)
def __init__(self, criterion, data_loader, logger, config, global_step=0, target='train_dataset'): self.criterion = criterion self.data_loader = data_loader self.logger = logger self.config = config self.log_frequency = config.log_frequency if config.log_frequency is not None else 100 self.loss_meters = util.AverageMeter() self.acc_meters = util.AverageMeter() self.acc5_meters = util.AverageMeter() self.global_step = global_step self.target = target print(self.target)
def run(split, epoch, model, optimizer, criterion, dataloaders, args): training = split == "train" if training: ctx = nullcontext model.train() else: ctx = torch.no_grad model.eval() ranger = tqdm(dataloaders[split], desc=f"{split} epoch {epoch}") loss_meter = util.AverageMeter() acc_meter = util.AverageMeter() for (s1, s1len, s2, s2len, targets) in ranger: if args.cuda: s1 = s1.cuda() s1len = s1len.cuda() s2 = s2.cuda() s2len = s2len.cuda() targets = targets.cuda() batch_size = targets.shape[0] with ctx(): logits = model(s1, s1len, s2, s2len) loss = criterion(logits, targets) if training: optimizer.zero_grad() loss.backward() optimizer.step() preds = logits.argmax(1) acc = (preds == targets).float().mean() loss_meter.update(loss.item(), batch_size) acc_meter.update(acc.item(), batch_size) ranger.set_description( f"{split} epoch {epoch} loss {loss_meter.avg:.3f} acc {acc_meter.avg:.3f}" ) return {"loss": loss_meter.avg, "acc": acc_meter.avg}
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2, args): nll_meter = util.AverageMeter() model.eval() pred_dict = {} with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(data_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) ## Additions for BERT ## max_context_len, max_question_len = args.para_limit, args.ques_limit if "bert" in args.model_type: bert_dev_embeddings = get_embeddings("dev", ids, args.para_limit, args.ques_limit) else: bert_dev_embeddings = None # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs, bert_dev_embeddings, \ max_context_len, max_question_len, device) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, max_len, use_squad_v2) # Log info progress_bar.update(batch_size) progress_bar.set_postfix(NLL=nll_meter.avg) preds, _ = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), use_squad_v2) pred_dict.update(preds) model.train() results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) return results, pred_dict