def train_secondary_network(): (x_train_em, y_train_em), (x_val, y_val), (x_test_em, y_test_em) = create_synthetic_dataset_em() batch_size = 32 variables = 2 model_em, _ = u.get_model_em(batch_size, bidirectional=False, variables=variables) # with random inits prediction_probs = model_em.predict(x_test_em) predictions = [int(np.round(p[1])) for p in prediction_probs] print(prediction_probs) # print(predictions) acc = u.accuracy(predictions, y_test_em) print('Accuracy Before:', acc) loss_em = u.get_loss_em(is_triplet=False) optimizer_em = u.get_optimiser_em() model_em.compile(optimizer=optimizer_em, loss=loss_em) model_em.fit(x_train_em, y_train_em, batch_size=batch_size, epochs=2, validation_data=(x_val, y_val)) prediction_probs = model_em.predict(x_test_em) predictions = [int(np.round(p[1])) for p in prediction_probs] print(prediction_probs) # print(predictions) acc = u.accuracy(predictions, y_test_em) print('Accuracy After:', acc) return model_em
def train_secondary_network(y_train): (x_train_em, y_train_em), (x_test_em, y_test_em) = create_synthetic_dataset_sec(y_train) batch_size = 32 maxlen = batch_size * 500 variables = 1 model_em, _ = u.get_model_em(batch_size, bidirectional=False, variables=variables) prediction_probs = model_em.predict(x_test_em) predictions = [int(np.round(p[1])) for p in prediction_probs] print(prediction_probs) print(predictions) acc = u.accuracy(predictions, y_test_em) print('Accuracy Before:', acc) loss_em = u.get_loss_em() optimizer_em = u.get_optimiser_em() model_em.compile(optimizer=optimizer_em, loss=loss_em) model_em.fit(x_train_em, y_train_em, batch_size=32, epochs=1) prediction_probs = model_em.predict(x_test_em) predictions = [int(np.round(p[1])) for p in prediction_probs] print(prediction_probs) print(predictions) acc = u.accuracy(predictions, y_test_em) print('Accuracy After:', acc) return model_em
def train_epoch(model, data_loader, criterion, optimizer, device, opt): model.train() losses = AverageMeter('Loss', ':.2f') accuracies = AverageMeter('Acc', ':.2f') progress = ProgressMeter( len(data_loader), [losses, accuracies], prefix='Train: ') # Training for batch_idx, (data, targets) in enumerate(data_loader): # compute outputs data, targets = data.to(device), targets.to(device) outputs = model(data) loss = criterion(outputs, targets) acc = accuracy(outputs, targets) losses.update(loss.item(), data.size(0)) accuracies.update(acc[0].item(), data.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # show information if batch_idx % opt.log_interval == 0: progress.display(batch_idx) # show information print(f' * Train Loss {losses.avg:.3f}, Train Acc {accuracies.avg:.3f}') return losses.avg, accuracies.avg
def train(train_queue, model, criterion, optimizer, epoch_str): loss_meter = util.AvgrageMeter() top1 = util.AvgrageMeter() top5 = util.AvgrageMeter() model.train() length = train_queue.__len__() for step, (input_data, target) in enumerate(train_queue): input_data = input_data.cuda() target = target.cuda() optimizer.zero_grad() res = model(input_data) loss = criterion(res, target) loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = util.accuracy(res, target, top_k=(1, 5)) n = input_data.size(0) loss_meter.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info( 'train - epoch:{:}\tbatch:[{:03d}/{:03d}]\tavg_loss:{:.6f}\ttop1_acc:{:.2f}%\ttop5_acc:{:.2f}%' .format(epoch_str, step, length, loss_meter.avg, top1.avg, top5.avg)) return top1.avg, loss_meter.avg
def test(): net.eval() topk = [1, 5] loss_meters = collections.defaultdict(lambda: tnt.meter.AverageValueMeter()) for idx, batch in enumerate(testloader): batch = util.batch_cuda(batch) pred, loss_dict = net(batch) loss_dict = {k:v.mean() for k,v in loss_dict.items() if v.numel()>0} loss = sum(loss_dict.values()) for k, v in loss_dict.items(): loss_meters[k].add(v.item()) prec_scores = util.accuracy(pred, batch['label'], topk=topk) for k, prec in zip(topk, prec_scores): loss_meters['P%s'%k].add(prec.item(), pred.shape[0]) stats = ' | '.join(['%s: %.3f'%(k, v.value()[0]) for k,v in loss_meters.items()]) print ('%d/%d.. %s'%(idx, len(testloader), stats)) print ('(test) %s'%stats)
def train(model, train_loader, args): optimizer = Adam(model.parameters(), lr=args.lr) exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.milestones, gamma=args.gamma) criterion = nn.CrossEntropyLoss().cuda(device) for epoch in range(args.epochs): loss_record = AverageMeter() acc_record = AverageMeter() model.train() exp_lr_scheduler.step() for batch_idx, (x, label, _) in enumerate(train_loader): x, target = x.to(device), label.to(device) optimizer.zero_grad() output = model(x) loss = criterion(output, target) acc = accuracy(output, target) loss.backward() optimizer.step() acc_record.update(acc[0].item(), x.size(0)) loss_record.update(loss.item(), x.size(0)) print('Train Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( epoch, loss_record.avg, acc_record.avg)) test(model, eva_loader, args) torch.save(model.state_dict(), args.model_dir) print("model saved to {}.".format(args.model_dir))
def metric(**args): """ Re-implement your metric here. we use this metric to save best weight and show. if multi-metric return. use the first as default. :param args: :return: Note: you must return a dictionary, otherwise we will not show them up. """ return {'acc': accuracy(args['out'], args['label'])}
def mode_val(valid_loader, model, criterion, epoch_str, print_freq): model.eval() val_top1 = AverageMeter() val_top5 = AverageMeter() val_loss = AverageMeter() for step, (val_input, val_target) in enumerate(valid_loader): batch = val_input.size(0) val_input = val_input.cuda() val_target = val_target.cuda() if model.name in ['cifar10', 'cifar100']: feature, logits_aux = model(val_input) loss = criterion(feature, val_target) if logits_aux is not None: loss_aux = criterion(logits_aux, val_target) loss += model.pre_model.auxiliary_weight * loss_aux elif model.name in ['uji', 'hapt']: feature = model(val_input) loss = criterion(feature, val_target) else: raise ValueError pre1, pre5 = accuracy(feature.data, val_target.data, top_k=(1, 5)) val_top1.update(pre1.item(), batch) val_top5.update(pre5.item(), batch) val_loss.update(loss.item(), batch) if step % print_freq == 0 or step + 1 == len(valid_loader): str1 = 'valid - epoch:' + epoch_str + ' batch:[' + '{:3d}/{:}] '.format( step, len(valid_loader)) str2 = '[Loss:{:.6f} Pre@1:{:.5f}% Pre@5:{:.5f}%]'.format( val_loss.avg, val_top1.avg, val_top5.avg) logging.info(str1 + str2) return val_top1.avg, val_top5.avg, val_loss.avg
def test(self, val_loader, net, criterion): top1 = AverageMeter() top5 = AverageMeter() print_freq = 100 # switch to evaluate mode net.eval() with torch.no_grad(): for i, (input, label) in enumerate(val_loader): target = label.cuda() input = input.cuda() # forwardclea prob1, cam_top1, M_p = net(input) crop_img = attention_crop_test(M_p, input, config.mask_test_th) crop_img = crop_img.cuda() prob2, cam_top1_2, _ = net(crop_img) # measure accuracy and record loss out = (F.softmax(prob1, dim=-1) + F.softmax(prob2, dim=-1)) / 2 prec1, prec5 = accuracy(out, target, topk=(1, 5)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg, top5.avg
def _intermediate_stats_logging(self, outs, y, loss, step, epoch, N, len_loader, val_or_train, hc_losses=None): prec1, prec5 = accuracy(outs, y, topk=(1, 5)) self.losses.update(loss.item(), N) self.top1.update(prec1.item(), N) self.top5.update(prec5.item(), N) if hc_losses is not None: self.hc_losses.update(hc_losses.item(), 1) if (step > 1 and step % self.CONFIG.print_freq == 0) or step == len_loader - 1: logging.info( val_or_train + ":[{:3d}/{}] Step {:03d}/{:03d} Loss {:.3f} HC Loss {:.3f}" "Prec@(1, 3) ({:.1%}, {:.1%})".format( epoch + 1, self.epochs, step, len_loader - 1, self.losses.get_avg(), self.hc_losses.get_avg(), self.top1.get_avg(), self.top5.get_avg()))
def infer(valid_queue, model, criterion, epoch_str): loss_meter = util.AvgrageMeter() top1 = util.AvgrageMeter() top5 = util.AvgrageMeter() model.eval() length = valid_queue.__len__() with torch.no_grad(): for step, (input_data, target) in enumerate(valid_queue): input_data = input_data.cuda() target = target.cuda() res = model(input_data) loss = criterion(res, target) prec1, prec5 = util.accuracy(res, target, top_k=(1, 5)) n = input_data.size(0) loss_meter.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info( 'valid - epoch:{:}\tbatch:[{:03d}/{:03d}]\tavg_loss:{:.6f}\ttop1_acc:{:.2f}%\ttop5_acc:{:.2f}%' .format(epoch_str, step, length, loss_meter.avg, top1.avg, top5.avg)) return top1.avg, loss_meter.avg
def infer(valid_queue, model, criterion): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() model.eval() for step, (input_data, target) in enumerate(valid_queue): # input_data = Variable(input_data, volatile=True).cuda() # target = Variable(target, volatile=True).cuda(async=True) input_data = input_data.cuda() target = target.cuda() res = model(input_data) loss = criterion(res, target) prec1, prec5 = accuracy(res, target, top_k=(1, 5)) n = input_data.size(0) objs.update(loss.data[0], n) top1.update(prec1.data[0], n) top5.update(prec5.data[0], n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def train(epoch, model, device, dataloader, optimizer, exp_lr_scheduler, criterion, args): loss_record = AverageMeter() acc_record = AverageMeter() exp_lr_scheduler.step() model.train() for batch_idx, (data, label) in enumerate(tqdm(dataloader(epoch))): data, label = data.to(device), label.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, label) # measure accuracy and record loss acc = accuracy(output, label) acc_record.update(acc[0].item(), data.size(0)) loss_record.update(loss.item(), data.size(0)) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() print('Train Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format( epoch, loss_record.avg, acc_record.avg)) return loss_record
def train(self, state, epoch): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() print_freq = config.print_freq net = state['model'] criterion = state['criterion'] optimizer = state['optimizer'] train_loader = state['train_loader'] write = state['write'] net.train() pbar = tqdm(total=len(train_loader), unit='batches') pbar.set_description('Epoch {}/{}'.format(epoch + 1, config.epochs)) for i, (img, label) in enumerate(train_loader): # if config.use_gpu: if img.size(0) == 1: img = img.repeat(2, 1, 1, 1) label = label.repeat(2) target = label.cuda() input = img.cuda() optimizer.zero_grad() #net forward prob1, cam_top1, M_p = net(input) crop_img = attention_drop_train(M_p, input, config.mask_train_th, cam_top1) crop_img = crop_img.cuda() prob2, cam_top1_2, _ = net(crop_img) loss1 = criterion(prob1, target) loss2 = criterion(prob2, target) loss = (loss1 + loss2) / 2 #net train accuracy prec1, prec5 = accuracy(prob1, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) loss.backward() optimizer.step() if i % 300 == 0: #first image show first_pre = image_with_cam(input, cam_top1) #second image show second_pre = image_with_cam(crop_img, cam_top1_2) #可视化mask4 write.add_images('first_pre', first_pre, 0, dataformats='NCHW') write.add_images('second_pre', second_pre, 0, dataformats='NCHW') pbar.update() # pbar.set_postfix_str(batch_info) pbar.close() return top1.avg, losses.avg
def test(model, test_loader, args): model.eval() acc_record = AverageMeter() for batch_idx, (x, label, _) in enumerate(test_loader): x, target = x.to(device), label.to(device) output = model(x) acc = accuracy(output, target) acc_record.update(acc[0].item(), x.size(0)) print('Test: Avg Acc: {:.4f}'.format(acc_record.avg))
def batch(images, target, model, criterion=None): images = images.cuda() target = target.cuda() if criterion: with torch.cuda.amp.autocast(): loss = criterion(model(images), target) return loss else: return util.accuracy(model(images), target, top_k=(1, 5))
def train(args): train_dataloader, test_dataloader, model, model2 = init_from_scrach(args) best_acc = 0.0 best_epoch = 0 iter = 0 logger_path = os.path.join(args.log_dir, time.strftime('%Y%m%d_%H:%M:%S')) logger('Save log to %s' % logger_path) writer = SummaryWriter(log_dir=logger_path) for i in range(args.num_epoches): loss_sum = 0 acc_sum = 0.0 samples_num = 0 for j, a_data in enumerate(train_dataloader): iter += 1 model.optimizer.zero_grad() _, feature = model(a_data[0], a_data[-1]) _, feature2 = model(a_data[1], a_data[-1]) out = model2(feature, feature2) loss = model2.loss(out, a_data[-1]) loss.backward() if args.grad_clipping != 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clipping) model.optimizer.step() loss_sum += loss.item() samples_num += len(a_data[0]) acc = accuracy(out=out.data.cpu().numpy(), label=a_data[-1]) acc_sum += acc writer.add_scalar('epoch%d/loss' % i, loss_sum / (j + 1), iter) writer.add_scalar('epoch%d/accuracy' % i, acc_sum / samples_num, iter) if (j + 1) % args.print_every_n == 0: logging.info('train: Epoch = %d | iter = %d/%d | ' % (i, j, len(train_dataloader)) + 'loss sum = %.4f | accuracy : %.4f' % (loss_sum * 1.0 / j, acc_sum / samples_num)) for name, param in model.named_parameters(): if param.grad is not None: writer.add_histogram(name, param.clone().cpu().data.numpy(), j) writer.add_histogram( name + '/grad', param.grad.clone().cpu().data.numpy(), j) logging.info("Testing...... | Model : {0} | Task : {1}".format( model.__class__.__name__, train_dataloader.dataset.__class__.__name__)) testacc = test(args, model, model2, test_dataloader) best_epoch = best_epoch if best_acc > testacc else i best_acc = best_acc if best_acc > testacc else testacc logging.error( 'Test result acc1: %.4f | best acc: %.4f | best epoch : %d' % (testacc, best_acc, best_epoch)) return [best_acc, train_dataloader.dataset.__class__.__name__, best_epoch]
def evaluation(args, model, data_loader): model.eval() samples_num = 0 acc_sum = 0.0 for j, a_data in enumerate(data_loader): out, _ = model(*a_data) samples_num += len(a_data[0]) acc_sum += accuracy(out=out.data.cpu().numpy(), label=a_data[-1]) model.train() return acc_sum / samples_num
def _eval_task(self, task_idx: int, val_loader: TaskDataLoader, train_epoch: int, val_epoch: int) -> Tuple[float, float, Dict]: self.crt_eval_info = info = dict({}) self._start_eval_task() # TEMPLATE report_or_not = self._eval_batch_report print_freq = self._eval_batch_show_freq report_freq = self._eval_batch_save_freq report = self.report last_batch = len(val_loader) - 1 losses = AverageMeter() acc = AverageMeter() correct_cnt = 0 seen = self.seen seen_eval = 0 with torch.no_grad(): for batch_idx, (data, targets, head_idx) in enumerate(val_loader): outputs, loss, info_batch = self._eval_task_batch( batch_idx, data, targets, head_idx) info.update(info_batch) (top1, correct), = accuracy(outputs, targets) correct_cnt += correct seen_eval += data.size(0) acc.update(top1, data.size(0)) losses.update(loss.item(), data.size(0)) if report_or_not: if report_freq > 0: if (batch_idx + 1 ) % report_freq == 0 or batch_idx == last_batch: report.trace_eval_batch(seen, task_idx, train_epoch, val_epoch, info) if print_freq > 0: if (batch_idx + 1 ) % print_freq == 0 or batch_idx == last_batch: print( f'\t\t[Eval] [Epoch: {train_epoch:3}] [Batch: {batch_idx:5}]:\t ' f'[Loss] crt: {losses.val:3.4f} avg: {losses.avg:3.4f}\t' f'[Accuracy] crt: {acc.val:3.2f} avg: {acc.avg:.2f}' ) self._end_eval_task() # TEMPLATE return losses.avg, correct_cnt / float(seen_eval), info
def evaluation(args, model, model2, data_loader): model.eval() samples_num = 0 acc_sum = 0.0 for j, a_data in enumerate(data_loader): _, feature = model(a_data[0], a_data[-1]) _, feature2 = model(a_data[1], a_data[-1]) out = model2(feature, feature2) samples_num += len(a_data[0]) acc_sum += accuracy(out=out.data.cpu().numpy(), label=a_data[-1]) model.train() return acc_sum / samples_num
def _intermediate_stats_logging(self, outs, y, loss, step, epoch, N, len_loader, val_or_train): prec1, prec3 = accuracy(outs, y, topk=(1, 5)) self.losses.update(loss.item(), N) self.top1.update(prec1.item(), N) self.top3.update(prec3.item(), N) if (step > 1 and step % self.print_freq==0) or step == len_loader -1 : self.logger.info(val_or_train+ ":[{:3d}/{}] Step {:03d}/{:03d} Loss {:.3f}" "Prec@(1, 3) ({:.1%}, {:.1%}), loss {:.3f}".format( epoch+1, self.train_epochs, step, len_loader-1, self.losses.get_avg(), self.top1.get_avg(), self.top3.get_avg(), self.losses.get_avg()) )
def test(model, device, dataloader, args): acc_record = AverageMeter() model.eval() for batch_idx, (data, label) in enumerate(tqdm(dataloader())): data, label = data.to(device), label.to(device) output = model(data) # measure accuracy and record loss acc = accuracy(output, label) acc_record.update(acc[0].item(), data.size(0)) print('Test Acc: {:.4f}'.format(acc_record.avg)) return acc_record
def val(self, epoch): # test mode self.model_feature.eval() self.model_target_classifier.eval() val_losses = AverageMeter() val_top1_accs = AverageMeter() # Batches for i, (imgs, labels) in enumerate(self.val_loader): # Move to GPU, if available if torch.cuda.is_available(): imgs = imgs.cuda() labels = labels.cuda() if self.data_aug == 'improved': bs, ncrops, c, h, w = imgs.size() imgs = imgs.view(-1, c, h, w) # forward and loss with torch.no_grad(): outputs = self.model_feature(imgs) outputs = self.model_target_classifier(outputs) if self.data_aug == 'improved': outputs = outputs.view(bs, ncrops, -1).mean(1) val_loss = self.loss_fn(outputs, labels) val_losses.update(val_loss.item(), imgs.size(0)) # compute accuracy top1_accuracy = accuracy(outputs, labels, 1) val_top1_accs.update(top1_accuracy, imgs.size(0)) # batch update self.layer_outputs_source.clear() self.layer_outputs_target.clear() # Print status if i % self.print_freq == 0: self.logger.info('Val Epoch: [{:d}/{:d}][{:d}/{:d}]\tval_loss={:.4f}\t\ttop1_accuracy={:.4f}\t' .format(epoch, self.num_epochs, i, len(self.val_loader), val_losses.avg, val_top1_accs.avg)) # save tensorboard self.writer.add_scalar('Val_loss', val_losses.avg, epoch) self.writer.add_scalar('Val_top1_accuracy', val_top1_accs.avg, epoch) self.logger.info('||==> Val Epoch: [{:d}/{:d}]\tval_loss={:.4f}\t\ttop1_accuracy={:.4f}' .format(epoch, self.num_epochs, val_losses.avg, val_top1_accs.avg)) return val_losses.avg, val_top1_accs.avg
def batch_fn(images, target, model, device, loss_fn, training=True): images = images.to(device, non_blocking=True) target = target.to(device, non_blocking=True) if training: with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()): output = model(images) loss = loss_fn(output, target) else: output = model(images) if isinstance(output, (tuple, list)): output = output[0] loss = loss_fn(output, target).data acc1, acc5 = util.accuracy(output, target, top_k=(1, 5)) return loss, acc1, acc5, output
def validate(self, state, epoch): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() print_freq = config.print_freq net = state['model'] val_loader = state['val_loader'] criterion = state['criterion'] write_val = state['write_val'] # switch to evaluate mode net.eval() with torch.no_grad(): for i, (input, label) in enumerate(val_loader): if input.size(0) == 1: input = input.repeat(2, 1, 1, 1) label = label.repeat(2) target = label.cuda() input = input.cuda() # forward prob1, cam_top1, M_p = net(input) crop_img = attention_crop_test(M_p, input, config.mask_test_th) crop_img = crop_img.cuda() prob2, cam_top1_2, _ = net(crop_img) loss1 = criterion(prob1, target) loss2 = criterion(prob2, target) loss = (loss1 + loss2) / 2 out = (F.softmax(prob1, dim=-1) + F.softmax(prob2, dim=-1)) / 2 # measure accuracy and record loss prec1, prec5 = accuracy(out, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg, losses.avg
def batch_forward(batch, model, pair_loss_fc, type_loss_fc): query_input_ids, query_sentence_types, query_position_ids, query_masks, \ left_input_ids, left_sentence_types, left_position_ids, left_masks, \ right_input_ids, right_sentence_types, right_position_ids, right_masks, \ labels, types = batch pair_probs, type_out, _, _ = model( query_input_ids, query_masks, query_sentence_types, query_position_ids, left_input_ids, left_masks, left_sentence_types, left_position_ids, right_input_ids, right_masks, right_sentence_types, right_position_ids) pair_loss, type_loss = util.loss(pair_loss_fc, type_loss_fc, pair_probs, type_out, labels, types) loss = pair_loss + type_loss acc, f1 = util.accuracy(pair_probs, labels) return loss, acc, f1
def validate(val_loader, model, criterion, local_rank, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.cuda(local_rank, non_blocking=True) target = target.cuda(local_rank, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) torch.distributed.barrier() reduced_loss = reduce_mean(loss, args.nprocs) reduced_acc1 = reduce_mean(acc1, args.nprocs) reduced_acc5 = reduce_mean(acc5, args.nprocs) losses.update(reduced_loss.item(), images.size(0)) top1.update(reduced_acc1.item(), images.size(0)) top5.update(reduced_acc5.item(), images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # 只在主进程打印信息 if local_rank == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter # 只在主进程打印信息 if local_rank == 0: print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() for step, (input_data, target) in enumerate(train_queue): model.train() n = input_data.size(0) # input_data = Variable(input_data, requires_grad=False).cuda() # target = Variable(target, requires_grad=False).cuda(async=True) input_data = input_data.cuda() target = target.cuda() # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) # input_search = Variable(input_search, requires_grad=False).cuda() # target_search = Variable(target_search, requires_grad=False).cuda(async=True) input_search = input_search.cuda() target_search = target_search.cuda() architect.step(input_data, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() logits = model(input_data) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = accuracy(logits, target, top_k=(1, 5)) objs.update(loss.data[0], n) top1.update(prec1.data[0], n) top5.update(prec5.data[0], n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def evaluation(args, model, data_loader): model.eval() samples_num = 0 acc_sum = 0.0 pred = list() for j, a_data in enumerate(data_loader): out, _ = model(*a_data) pred.extend(out.data.cpu().numpy().max(-1)[1].tolist()) samples_num += len(a_data[0]) acc_sum += accuracy(out=out.data.cpu().numpy(), label=a_data[-1]) model.train() acc, pred = acc_sum / samples_num, pred save_pred(args, pred, data_loader.dataset) return acc, pred
def val(model, loader, device): top1 = AverageMeter() model.eval() start_time = time.time() with torch.no_grad(): for step, (X, y) in enumerate(loader): X, y = X.to(device), y.to(device) N = X.shape[0] outs = model(X) prec1 = accuracy(outs, y, topk=(1,))[0] top1.update(prec1.item(), N) top1_avg = top1.get_avg() logging.info("Test: Final Prec@1 {:.2%} Time {:.2f}".format(top1_avg, time.time()-start_time))