def evaluate(args,model,processor): eval_dataset = load_and_cache_examples(args,processor, data_type='dev') eval_dataloader = DatasetLoader(data=eval_dataset, batch_size=args.batch_size, shuffle=False, seed=args.seed, sort=False, vocab=processor.vocab, label2id=args.label2id) pbar = ProgressBar(n_total=len(eval_dataloader), desc="Evaluating") metric = SeqEntityScore(args.id2label,markup=args.markup) eval_loss = AverageMeter() model.eval() with torch.no_grad(): for step, batch in enumerate(eval_dataloader): input_ids, input_mask, input_tags, input_lens = batch input_ids = input_ids.to(args.device) input_mask = input_mask.to(args.device) input_tags = input_tags.to(args.device) features, loss = model.forward_loss(input_ids, input_mask, input_lens, input_tags) eval_loss.update(val=loss.item(), n=input_ids.size(0)) tags, _ = model.crf._obtain_labels(features, args.id2label, input_lens) input_tags = input_tags.cpu().numpy() target = [input_[:len_] for input_, len_ in zip(input_tags, input_lens)] metric.update(pred_paths=tags, label_paths=target) pbar(step=step) print(" ") eval_info, class_info = metric.result() eval_info = {f'eval_{key}': value for key, value in eval_info.items()} result = {'eval_loss': eval_loss.avg} result = dict(result, **eval_info) return result, class_info
def train(): model.train() print("Train: Begin!") losses = AverageMeter() for epoch in range(cfg.epochs): for batch_idx, (data, label) in enumerate(tqdm((train_loader))): l, ab = data.to(device), label.to(device) output = model(l) if cfg.is_classification: from IPython import embed embed() a = (ab[:, 0, :, :] / (1. / cfg.bins)).floor().long() b = (ab[:, 1, :, :] / (1. / cfg.bins)).floor().long() loss = 0.5 * criterion(output[0], a) + \ 0.5 * criterion(output[1], b) else: loss = criterion(output, ab) losses.update(loss.item(), l.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % cfg.save_period == 0: print('Train Epoch:%d\tPercent:[%d/%d (%.0f%%)]\tLoss:%.9f\n' % (epoch, batch_idx * data.size(0), len(train_loader.dataset), 100. * batch_idx / len(train_loader), losses.avg)) torch.save(model.state_dict(), 'Weights/weights.pkl') print("Saving the model!") torch.save(model.state_dict(), 'Weights/weights.pkl') print("Done")
def predict(self, dataloader, checkpoint=None): predict_time = AverageMeter() if (checkpoint is not None): self._load_checkpoint(checkpoint) num_batches = len(dataloader) num_elements = len(dataloader.dataset) batch_size = dataloader.batch_size pred_array = torch.zeros(num_elements) prob_array = torch.zeros(num_elements, 2) self._model.train(False) start_event = torch.cuda.Event(enable_timing=True) end_event = torch.cuda.Event(enable_timing=True) for i, data in enumerate(dataloader): start_event.record() start = i * batch_size end = start + batch_size if i == num_batches - 1: end = num_elements pred_array[start:end], prob_array[start:end] = self._step( data, phase="predict") end_event.record() predict_time.update(start_event.elapsed_time(end_event)) throughput = batch_size * 1000.0 / predict_time.avg print("Prediction Throughput (images/sec) : {}".format(throughput)) return pred_array.numpy(), prob_array.numpy()
def train(dataset, loader, model, criterion, optimizer, device): losses = AverageMeter() model.train() with tqdm(total=len(dataset)) as t: t.set_description("train") for data in loader: lr, hr, sigma = data lr = lr.to(device) hr = hr.to(device) # sigma = sigma.to(device) sr = model(lr) loss = criterion(sr, hr) losses.update(loss.item(), lr.shape[0]) optimizer.zero_grad() loss.backward() optimizer.step() t.set_postfix(loss="{:.4f}".format(losses.avg)) t.update(lr.shape[0])
def eval_fn(model, dataloader): top1 = AverageMeter('Acc@1', ':6.2f') model.eval() with torch.no_grad(): for i, (images, targets) in enumerate(dataloader): images = images.cuda() targets = targets.cuda() outputs = model(images) acc1, _ = accuracy(outputs, targets, topk=(1, 5)) top1.update(acc1[0], images.size(0)) return top1.avg
def train(args,model,processor): train_dataset = load_and_cache_examples(args, processor, data_type='train') train_loader = DatasetLoader(data=train_dataset, batch_size=args.batch_size, shuffle=False, seed=args.seed, sort=True, vocab = processor.vocab,label2id = args.label2id) parameters = [p for p in model.parameters() if p.requires_grad] optimizer = optim.Adam(parameters, lr=args.learning_rate) scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=1, epsilon=1e-4, cooldown=0, min_lr=0, eps=1e-8) best_f1 = 0 for epoch in range(1, 1 + args.epochs): print(f"Epoch {epoch}/{args.epochs}") pbar = ProgressBar(n_total=len(train_loader), desc='Training') train_loss = AverageMeter() model.train() assert model.training for step, batch in enumerate(train_loader): input_ids, input_mask, input_tags, input_lens = batch input_ids = input_ids.to(args.device) input_mask = input_mask.to(args.device) input_tags = input_tags.to(args.device) features, loss = model.forward_loss(input_ids, input_mask, input_lens, input_tags) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) optimizer.step() optimizer.zero_grad() pbar(step=step, info={'loss': loss.item()}) train_loss.update(loss.item(), n=1) print(" ") train_log = {'loss': train_loss.avg} if 'cuda' in str(args.device): torch.cuda.empty_cache() eval_log, class_info = evaluate(args,model,processor) logs = dict(train_log, **eval_log) show_info = f'\nEpoch: {epoch} - ' + "-".join([f' {key}: {value:.4f} ' for key, value in logs.items()]) logger.info(show_info) scheduler.epoch_step(logs['eval_f1'], epoch) if logs['eval_f1'] > best_f1: logger.info(f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {logs['eval_f1']}") logger.info("save model to disk.") best_f1 = logs['eval_f1'] if isinstance(model, nn.DataParallel): model_stat_dict = model.module.state_dict() else: model_stat_dict = model.state_dict() state = {'epoch': epoch, 'arch': args.arch, 'state_dict': model_stat_dict} model_path = args.output_dir / 'best-model.bin' torch.save(state, str(model_path)) print("Eval Entity Score: ") for key, value in class_info.items(): info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}" logger.info(info)
def train(epoch): train_sampler.set_epoch(epoch) model.train() losses = AverageMeter() top1 = AverageMeter() global best_pred, acclist_train for batch_idx, (data, target) in enumerate(train_loader): scheduler(optimizer, batch_idx, epoch, best_pred) if not args.mixup: data, target = data.cuda(args.gpu), target.cuda(args.gpu) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() if not args.mixup: acc1 = accuracy(output, target, topk=(1,)) top1.update(acc1[0], data.size(0)) losses.update(loss.item(), data.size(0)) if batch_idx % 100 == 0 and args.gpu == 0: if args.mixup: print('Batch: %d| Loss: %.3f'%(batch_idx, losses.avg)) else: print('Batch: %d| Loss: %.3f | Top1: %.3f'%(batch_idx, losses.avg, top1.avg)) acclist_train += [top1.avg]
def valid(dataset, loader, model, criterion, device): losses = AverageMeter() psnrs = AverageMeter() model.eval() with tqdm(total=len(dataset)) as t: t.set_description("valid") for data in loader: lr, hr, sigma = data lr = lr.to(device) hr = hr.to(device) with torch.no_grad(): sr = model(lr) loss = criterion(sr, hr) losses.update(loss.item(), lr.shape[0]) sr = quantize(sr, [0, 255]) psnr = calc_psnr(sr, hr) psnrs.update(psnr.item(), lr.shape[0]) t.set_postfix(loss='{:.4f}'.format(losses.avg)) t.update(lr.shape[0]) return psnrs.avg
def evaluate(args,model,processor): eval_dataset = load_and_cache_examples(args,processor, data_type='dev') eval_dataloader = DatasetLoader(data=eval_dataset, batch_size=args.batch_size, shuffle=False, seed=args.seed, sort=False, vocab=processor.vocab, label2id=args.label2id) pbar = ProgressBar(n_total=len(eval_dataloader), desc="Evaluating") metric = SeqEntityScore(args.id2label,markup=args.markup) eval_loss = AverageMeter() model.eval() fout = open(args.output_dir / 'bilstm+crf.result.txt','w') with torch.no_grad(): for step, batch in enumerate(eval_dataloader): input_chars, input_ids, input_mask, input_tags, input_lens = batch input_ids = input_ids.to(args.device) input_mask = input_mask.to(args.device) input_tags = input_tags.to(args.device) features, loss = model.forward_loss(input_ids, input_mask, input_lens, input_tags) eval_loss.update(val=loss.item(), n=input_ids.size(0)) tags, _ = model.crf._obtain_labels(features, args.id2label, input_lens) input_tags = input_tags.cpu().numpy() target = [input_[:len_] for input_, len_ in zip(input_tags, input_lens)] # 从这里可以看出,这个输出只适用于batch=1 assert(len(tags[0])==len(input_tags[0])) for i in range(len(tags[0])): fout.write(input_chars[i] + ' ' + args.id2label[input_tags[0][i]] + ' ' + tags[0][i] + '\n') print(input_chars[i], tags[0][i], args.id2label[input_tags[0][i]]) # print(processor.vocab.to_word(input_chars[0][i]), tags[0][i], args.id2label[input_tags[0][i]]) fout.write("\n") metric.update(pred_paths=tags, label_paths=target) pbar(step=step) print(" ") fout.close() eval_info, class_info = metric.result() eval_info = {f'eval_{key}': value for key, value in eval_info.items()} result = {'eval_loss': eval_loss.avg} result = dict(result, **eval_info) return result, class_info
def validate(epoch): model.eval() top1 = AverageMeter() top5 = AverageMeter() global best_pred, acclist_train, acclist_val is_best = False for batch_idx, (data, target) in enumerate(val_loader): data, target = data.cuda(args.gpu), target.cuda(args.gpu) with torch.no_grad(): output = model(data) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], data.size(0)) top5.update(acc5[0], data.size(0)) # sum all sum1, cnt1, sum5, cnt5 = torch_dist_sum(args.gpu, top1.sum, top1.count, top5.sum, top5.count) if args.eval: if args.gpu == 0: top1_acc = sum(sum1) / sum(cnt1) top5_acc = sum(sum5) / sum(cnt5) print('Validation: Top1: %.3f | Top5: %.3f'%(top1_acc, top5_acc)) return if args.gpu == 0: top1_acc = sum(sum1) / sum(cnt1) top5_acc = sum(sum5) / sum(cnt5) print('Validation: Top1: %.3f | Top5: %.3f'%(top1_acc, top5_acc)) # save checkpoint acclist_val += [top1_acc] if top1_acc > best_pred: best_pred = top1_acc is_best = True save_checkpoint({ 'epoch': epoch, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, 'acclist_train':acclist_train, 'acclist_val':acclist_val, }, args=args, is_best=is_best)
num_train_optimization_steps = len(data_iter) * Config.epochs warmup_steps = int(num_train_optimization_steps * Config.warmup_proportion) optimizer = AdamW(params=optimizer_grouped_parameters, lr=Config.learning_rate, eps=Config.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=warmup_steps, num_training_steps=num_train_optimization_steps) loss_fct = CrossEntropyLoss(ignore_index=-1) mask_metric = LMAccuracy() sop_metric = LMAccuracy() tr_mask_acc = AverageMeter() tr_sop_acc = AverageMeter() tr_loss = AverageMeter() tr_mask_loss = AverageMeter() tr_sop_loss = AverageMeter() train_logs = {} nb_tr_steps = 0 global_step = 0 start_time = time.time() for epc in range(Config.epochs): for step, batch in enumerate(data_iter): batch = tuple(t.to(Config.device) for t in batch) input_ids, input_mask, segment_ids, lm_label_ids, is_next = batch outputs = model(input_ids=input_ids, token_type_ids=segment_ids,
def main(): global args, model args = parser.parse_args() print(args) if args.gpu and not torch.cuda.is_available(): raise Exception("No GPU found!") if not os.path.exists(args.test_result): os.makedirs(args.test_result) if not is_ready(args): prepare_data(args) cudnn.benchmark = True device = torch.device(('cuda:' + args.gpu_id) if args.gpu else 'cpu') model = Grad_none.GRAD(feats=args.feats, basic_conv=args.basic_conv, tail_conv=args.tail_conv) checkpoint_file = torch.load(args.test_checkpoint) model.load_state_dict(checkpoint_file['model']) model.eval() model = model.to(device) psnrs = AverageMeter() with tqdm(total=100) as t: t.set_description("test") for idx in range(0, 100): with h5py.File( "{}/DIV2K_np_test_{}.h5".format(args.h5file_dir, args.test_sigma), 'r') as h5: l_image, h_image = h5['l'][str(idx)][()], h5['h'][str(idx)][()] l_image = np2tensor(l_image) h_image = np2tensor(h_image) l_image = l_image.unsqueeze(0) h_image = h_image.unsqueeze(0) l_image = l_image.to(device) h_image = h_image.to(device) with torch.no_grad(): output = model(l_image) output = quantize(output, [0, 255]) psnr = calc_psnr(output, h_image) psnrs.update(psnr.item(), 1) if args.test_save: save_image_path = "{}/{:04d}.png".format( args.test_result, idx) output = output.squeeze(0) output = output.data.permute(1, 2, 0) save_image = pil_image.fromarray( output.byte().cpu().numpy()) save_image.save(save_image_path) t.update(1) print("PSNR: {:.4f}".format(psnrs.avg))
def evaluate(dataloader, model, criterion): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(dataloader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(dataloader): images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 50 == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg, top5.avg
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % 10 == 0: progress.display(i)
def train(args, model, processor): tokenizer = BertTokenizer.from_pretrained( './BERT_model/bert_pretrain/vocab.txt') train_dataset = load_and_cache_examples(args, processor, data_type='train') train_loader = DatasetLoader(data=train_dataset, batch_size=args.batch_size, shuffle=False, seed=args.seed, sort=True, vocab=processor.vocab, label2id=args.label2id, tokenizer=tokenizer) # train_loader = DatasetLoader(data=train_dataset, batch_size=args.batch_size, # shuffle=False, seed=args.seed, sort=True, # vocab=processor.vocab, label2id=args.label2id) parameters = [p for p in model.parameters() if p.requires_grad] optimizer = optim.Adam(parameters, lr=args.learning_rate) scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=1, epsilon=1e-4, cooldown=0, min_lr=0, eps=1e-8) train_metric = SeqEntityScore(args.id2label, markup=args.markup) best_f1 = 0 for epoch in range(1, 1 + args.epochs): strat_epoch_time = time.time() logger.info(f"Epoch {epoch}/{args.epochs}") #pbar = ProgressBar(n_total=len(train_loader), desc='Training') #进度条样式 train_loss = AverageMeter() model.train() assert model.training for step, batch in enumerate(train_loader): strat_batch_time = time.time() input_ids, input_mask, input_tags, input_lens = batch input_ids = input_ids.to(args.device) input_mask = input_mask.to(args.device) input_tags = input_tags.to(args.device) features, loss = model.forward_loss(input_ids, input_mask, input_lens, input_tags) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_norm) optimizer.step() optimizer.zero_grad() # pbar(step=step, info={'loss': loss.item()}) train_loss.update(loss.item(), n=1) tags, _ = model.crf._obtain_labels(features, args.id2label, input_lens) input_tags = input_tags.cpu().numpy() target = [ input_[:len_] for input_, len_ in zip(input_tags, input_lens) ] pre_train = train_metric.compute_train_pre(label_paths=target, pred_paths=tags) logger.info( f'time: {time.time() - strat_batch_time:.1f} train_loss: {loss.item():.4f} train_pre: {pre_train:.4f}' ) print(" ") logger.info(f'train_total_time: {time.time() - strat_epoch_time}') if 'cuda' in str(args.device): torch.cuda.empty_cache() # 释放显存 strat_eval_time = time.time() eval_f1 = evaluate(args, model, processor) show_info = f'eval_time: {time.time() - strat_eval_time:.1f} train_avg_loss: {train_loss.avg:.4f} eval_f1: {eval_f1:.4f} ' logger.info(show_info) scheduler.epoch_step(eval_f1, epoch) if eval_f1 > best_f1: # Epoch 1: eval_f1 improved from 0 to 0.4023105674481821 logger.info( f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {eval_f1}" ) best_f1 = eval_f1 model_stat_dict = model.state_dict() state = { 'epoch': epoch, 'arch': args.arch, 'state_dict': model_stat_dict } model_path = args.output_dir / 'best-model.bin' torch.save(state, str(model_path))
def _run_epoch(self, dataloader, phase="train"): print("Phase : {}".format(phase)) print("-" * 10) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() batch_size = dataloader.batch_size num_batches = len(dataloader) start = time.time() end = start for i, data in enumerate(dataloader): data_time.update(time.time() - end) loss, acc = self._step(data, phase) batch_time.update(time.time() - end) num_elem = batch_size if i != num_batches - 1 else data[0].size(0) losses.update(loss, num_elem) top1.update((acc * 100.0 / num_elem), num_elem) end = time.time() if i % self._print_freq == 0 or i == num_batches - 1: print('Epoch: [{0}] ({1}/{2})\t' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format( self._epoch + 1, i + 1, num_batches, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) epoch_time = end - start print('Throughput : {}'.format(batch_size / batch_time.avg)) print('Total time for Phase "{}"' 'in Epoch {:d} : {:f}'.format(phase, self._epoch + 1, epoch_time)) print("=" * 80) return losses.avg, top1.avg