def eval_epoch(model, data_loader, fold, epoch): writer = SummaryWriter(os.path.join(args.experiment_path, 'fold{}'.format(fold), 'eval')) metrics = { 'loss': utils.Mean(), } model.eval() with torch.no_grad(): fold_labels = [] fold_logits = [] fold_exps = [] for images, feats, exps, labels, _ in tqdm(data_loader, desc='epoch {} evaluation'.format(epoch)): images, feats, labels = images.to(DEVICE), feats.to(DEVICE), labels.to(DEVICE) logits = model(images, feats) loss = compute_loss( input=logits, target=labels, weight=np.linspace(1 / len(logits), 1., config.epochs)[epoch - 1].item()) metrics['loss'].update(loss.data.cpu().numpy()) *_, logits = logits fold_labels.append(labels) fold_logits.append(logits) fold_exps.extend(exps) fold_labels = torch.cat(fold_labels, 0) fold_logits = torch.cat(fold_logits, 0) if epoch % 10 == 0: temp, metric, fig = find_temp_global(input=fold_logits, target=fold_labels, exps=fold_exps) writer.add_scalar('temp', temp, global_step=epoch) writer.add_scalar('metric_final', metric, global_step=epoch) writer.add_figure('temps', fig, global_step=epoch) temp = 1. # use default temp fold_preds = assign_classes(probs=(fold_logits * temp).softmax(1).data.cpu().numpy(), exps=fold_exps) fold_preds = torch.tensor(fold_preds).to(fold_logits.device) metric = compute_metric(input=fold_preds, target=fold_labels) metrics = {k: metrics[k].compute_and_reset() for k in metrics} for k in metric: metrics[k] = metric[k].mean().data.cpu().numpy() images = images_to_rgb(images)[:16] print('[FOLD {}][EPOCH {}][EVAL] {}'.format( fold, epoch, ', '.join('{}: {:.4f}'.format(k, metrics[k]) for k in metrics))) for k in metrics: writer.add_scalar(k, metrics[k], global_step=epoch) writer.add_image('images', torchvision.utils.make_grid( images, nrow=math.ceil(math.sqrt(images.size(0))), normalize=True), global_step=epoch) return metrics
def train_epoch(model, optimizer, scheduler, data_loader, fold, epoch): writer = SummaryWriter( os.path.join(args.experiment_path, 'fold{}'.format(fold), 'train')) metrics = { 'loss': utils.Mean(), } update_transforms( round(224 + (config.crop_size - 224) * np.linspace(0, 1, config.epochs)[epoch - 1].item())) model.train() optimizer.zero_grad() for i, (images, feats, labels, ids) in enumerate( tqdm(data_loader, desc='epoch {} train'.format(epoch)), 1): images, feats, labels = images.to(DEVICE), feats.to(DEVICE), labels.to( DEVICE) logits = model(images, feats, labels) loss = compute_loss(input=logits, target=labels, weight=np.linspace(1., 0.8, config.epochs)[epoch - 1]) logits, _ = logits metrics['loss'].update(loss.data.cpu().numpy()) lr = scheduler.get_lr() (loss.mean() / config.opt.acc_steps).backward() if i % config.opt.acc_steps == 0: optimizer.step() optimizer.zero_grad() scheduler.step() with torch.no_grad(): metrics = {k: metrics[k].compute_and_reset() for k in metrics} images = images_to_rgb(images)[:16] print('[FOLD {}][EPOCH {}][TRAIN] {}'.format( fold, epoch, ', '.join('{}: {:.4f}'.format(k, metrics[k]) for k in metrics))) for k in metrics: writer.add_scalar(k, metrics[k], global_step=epoch) writer.add_scalar('learning_rate', lr, global_step=epoch) writer.add_image('images', torchvision.utils.make_grid( images, nrow=math.ceil(math.sqrt(images.size(0))), normalize=True), global_step=epoch)
def train_epoch(model, optimizer, scheduler, data_loader, unsup_data_loader, fold, epoch): assert len(data_loader) <= len(unsup_data_loader), (len(data_loader), len(unsup_data_loader)) writer = SummaryWriter( os.path.join(args.experiment_path, 'fold{}'.format(fold), 'train')) metrics = { 'loss': utils.Mean(), } update_transforms(np.linspace(0, 1, config.epochs)[epoch - 1].item()) data = zip(data_loader, unsup_data_loader) total = min(len(data_loader), len(unsup_data_loader)) model.train() optimizer.zero_grad() for i, ((images_s, _, labels_s, _), (images_u, _, _)) \ in enumerate(tqdm(data, desc='epoch {} train'.format(epoch), total=total), 1): images_s, labels_s, images_u = images_s.to(DEVICE), labels_s.to( DEVICE), images_u.to(DEVICE) labels_s = utils.one_hot(labels_s, NUM_CLASSES) with torch.no_grad(): b, n, c, h, w = images_u.size() images_u = images_u.view(b * n, c, h, w) logits_u = model(images_u, None, True) logits_u = logits_u.view(b, n, NUM_CLASSES) labels_u = logits_u.softmax(2).mean(1, keepdim=True) labels_u = labels_u.repeat(1, n, 1).view(b * n, NUM_CLASSES) labels_u = dist_sharpen(labels_u, temp=SHARPEN_TEMP) assert images_s.size() == images_u.size() assert labels_s.size() == labels_u.size() images, labels = torch.cat([images_s, images_u], 0), torch.cat([labels_s, labels_u], 0) images, labels = mixup(images, labels) assert images.size(0) == config.batch_size * 2 logits = model(images, None, True) loss = compute_loss(input=logits, target=labels, unsup=True) metrics['loss'].update(loss.data.cpu().numpy()) labels = labels.argmax(1) lr = scheduler.get_lr() (loss.mean() / config.opt.acc_steps).backward() if i % config.opt.acc_steps == 0: optimizer.step() optimizer.zero_grad() scheduler.step() with torch.no_grad(): metrics = {k: metrics[k].compute_and_reset() for k in metrics} images = images_to_rgb(images)[:16] print('[FOLD {}][EPOCH {}][TRAIN] {}'.format( fold, epoch, ', '.join('{}: {:.4f}'.format(k, metrics[k]) for k in metrics))) for k in metrics: writer.add_scalar(k, metrics[k], global_step=epoch) writer.add_scalar('learning_rate', lr, global_step=epoch) writer.add_image('images', torchvision.utils.make_grid( images, nrow=math.ceil(math.sqrt(images.size(0))), normalize=True), global_step=epoch)