def test_epoch_semi(test_loader, model, test_meter, cur_epoch): """Evaluates the model on the test set.""" # Enable eval mode model.eval() test_meter.iter_tic() total_ce_loss_1=0.0 total_ce_loss_k=0.0 total_samples=0 for cur_iter, (inputs, labels) in enumerate(test_loader): # Transfer the data to the current GPU device inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True) # Compute the predictions preds = model(inputs) # Compute normed CE error total_samples+=inputs.shape[0] probs = torch.softmax(preds, dim=1) _, lbs_u_guess = torch.max(probs, dim=1) criteria_u = nn.CrossEntropyLoss(reduction='none').cuda() normed_logits_1=F.normalize(preds,p=2,dim=1) loss_CE=(criteria_u(torch.softmax(normed_logits_1,dim=1),lbs_u_guess)).mean() total_ce_loss_1+=loss_CE.item()*inputs.shape[0] normed_logits_k=F.normalize(preds,p=2,dim=1)*10 #print(torch.norm(normed_logits_k,dim=1)) loss_CE=(criteria_u(torch.softmax(normed_logits_k,dim=1),lbs_u_guess)).mean() total_ce_loss_k+=loss_CE.item()*inputs.shape[0] # Compute the errors if cfg.TASK == "col": preds = preds.permute(0, 2, 3, 1) preds = preds.reshape(-1, preds.size(3)) labels = labels.reshape(-1) mb_size = inputs.size(0) * inputs.size(2) * inputs.size(3) * cfg.NUM_GPUS else: mb_size = inputs.size(0) * cfg.NUM_GPUS if cfg.TASK == "seg": # top1_err is in fact inter; top5_err is in fact union top1_err, top5_err = meters.inter_union(preds, labels, cfg.MODEL.NUM_CLASSES) else: ks = [1, min(5, cfg.MODEL.NUM_CLASSES)] # rot only has 4 classes top1_err, top5_err = meters.topk_errors(preds, labels, ks) # Combine the errors across the GPUs (no reduction if 1 GPU used) top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point) if cfg.TASK == "seg": top1_err, top5_err = top1_err.cpu().numpy(), top5_err.cpu().numpy() else: top1_err, top5_err = top1_err.item(), top5_err.item() test_meter.iter_toc() # Update and log stats test_meter.update_stats(top1_err, top5_err, mb_size) test_meter.log_iter_stats(cur_epoch, cur_iter) test_meter.iter_tic() # Log epoch stats result=test_meter.get_epoch_stats(cur_epoch) test_meter.log_epoch_stats(cur_epoch) test_meter.reset() return result,[total_ce_loss_1/total_samples,total_ce_loss_k/total_samples]
def test_epoch(test_loader, model, test_meter, cur_epoch): """Evaluates the model on the test set.""" # Enable eval mode model.eval() test_meter.iter_tic() for cur_iter, (inputs, labels) in enumerate(test_loader): # Transfer the data to the current GPU device inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True) # Compute the predictions preds = model(inputs) # Compute the errors if cfg.TASK == "col": preds = preds.permute(0, 2, 3, 1) preds = preds.reshape(-1, preds.size(3)) labels = labels.reshape(-1) mb_size = inputs.size(0) * inputs.size(2) * inputs.size(3) * cfg.NUM_GPUS else: mb_size = inputs.size(0) * cfg.NUM_GPUS if cfg.TASK == "seg": # top1_err is in fact inter; top5_err is in fact union top1_err, top5_err = meters.inter_union(preds, labels, cfg.MODEL.NUM_CLASSES) else: ks = [1, min(5, cfg.MODEL.NUM_CLASSES)] # rot only has 4 classes top1_err, top5_err = meters.topk_errors(preds, labels, ks) # Combine the errors across the GPUs (no reduction if 1 GPU used) top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point) if cfg.TASK == "seg": top1_err, top5_err = top1_err.cpu().numpy(), top5_err.cpu().numpy() else: top1_err, top5_err = top1_err.item(), top5_err.item() test_meter.iter_toc() # Update and log stats test_meter.update_stats(top1_err, top5_err, mb_size) test_meter.log_iter_stats(cur_epoch, cur_iter) test_meter.iter_tic() # Log epoch stats result=test_meter.get_epoch_stats(cur_epoch) test_meter.log_epoch_stats(cur_epoch) test_meter.reset() return result
def train_epoch(train_loader, model, loss_fun, optimizer, train_meter, cur_epoch): """Performs one epoch of training.""" # Update drop path prob for NAS if cfg.MODEL.TYPE == "nas": m = model.module if cfg.NUM_GPUS > 1 else model m.set_drop_path_prob(cfg.NAS.DROP_PROB * cur_epoch / cfg.OPTIM.MAX_EPOCH) # Shuffle the data loader.shuffle(train_loader, cur_epoch) # Update the learning rate per epoch if not cfg.OPTIM.ITER_LR: lr = optim.get_epoch_lr(cur_epoch) optim.set_lr(optimizer, lr) # Enable training mode model.train() train_meter.iter_tic() for cur_iter, (inputs, labels) in enumerate(train_loader): # Update the learning rate per iter if cfg.OPTIM.ITER_LR: lr = optim.get_epoch_lr(cur_epoch + cur_iter / len(train_loader)) optim.set_lr(optimizer, lr) # Transfer the data to the current GPU device inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True) # Perform the forward pass preds = model(inputs) # Compute the loss if isinstance(preds, tuple): loss = loss_fun(preds[0], labels) + cfg.NAS.AUX_WEIGHT * loss_fun(preds[1], labels) preds = preds[0] else: loss = loss_fun(preds, labels) # Perform the backward pass optimizer.zero_grad() loss.backward() # Update the parameters optimizer.step() # Compute the errors if cfg.TASK == "col": preds = preds.permute(0, 2, 3, 1) preds = preds.reshape(-1, preds.size(3)) labels = labels.reshape(-1) mb_size = inputs.size(0) * inputs.size(2) * inputs.size(3) * cfg.NUM_GPUS else: mb_size = inputs.size(0) * cfg.NUM_GPUS if cfg.TASK == "seg": # top1_err is in fact inter; top5_err is in fact union top1_err, top5_err = meters.inter_union(preds, labels, cfg.MODEL.NUM_CLASSES) else: ks = [1, min(5, cfg.MODEL.NUM_CLASSES)] # rot only has 4 classes top1_err, top5_err = meters.topk_errors(preds, labels, ks) # Combine the stats across the GPUs (no reduction if 1 GPU used) loss, top1_err, top5_err = dist.scaled_all_reduce([loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point) loss = loss.item() if cfg.TASK == "seg": top1_err, top5_err = top1_err.cpu().numpy(), top5_err.cpu().numpy() else: top1_err, top5_err = top1_err.item(), top5_err.item() train_meter.iter_toc() # Update and log stats train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def search_epoch(train_loader, model, loss_fun, optimizer, train_meter, cur_epoch): """Performs one epoch of differentiable architecture search.""" m = model.module if cfg.NUM_GPUS > 1 else model # Shuffle the data loader.shuffle(train_loader[0], cur_epoch) loader.shuffle(train_loader[1], cur_epoch) # Update the learning rate per epoch if not cfg.OPTIM.ITER_LR: lr = optim.get_epoch_lr(cur_epoch) optim.set_lr(optimizer[0], lr) # Enable training mode model.train() train_meter.iter_tic() trainB_iter = iter(train_loader[1]) for cur_iter, (inputs, labels) in enumerate(train_loader[0]): # Update the learning rate per iter if cfg.OPTIM.ITER_LR: lr = optim.get_epoch_lr(cur_epoch + cur_iter / len(train_loader[0])) optim.set_lr(optimizer[0], lr) # Transfer the data to the current GPU device inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True) # Update architecture if cur_epoch + cur_iter / len(train_loader[0]) >= cfg.OPTIM.ARCH_EPOCH: try: inputsB, labelsB = next(trainB_iter) except StopIteration: trainB_iter = iter(train_loader[1]) inputsB, labelsB = next(trainB_iter) inputsB, labelsB = inputsB.cuda(), labelsB.cuda(non_blocking=True) optimizer[1].zero_grad() loss = m._loss(inputsB, labelsB) loss.backward() optimizer[1].step() # Perform the forward pass preds = model(inputs) # Compute the loss loss = loss_fun(preds, labels) # Perform the backward pass optimizer[0].zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 5.0) # Update the parameters optimizer[0].step() # Compute the errors if cfg.TASK == "col": preds = preds.permute(0, 2, 3, 1) preds = preds.reshape(-1, preds.size(3)) labels = labels.reshape(-1) mb_size = inputs.size(0) * inputs.size(2) * inputs.size(3) * cfg.NUM_GPUS else: mb_size = inputs.size(0) * cfg.NUM_GPUS if cfg.TASK == "seg": # top1_err is in fact inter; top5_err is in fact union top1_err, top5_err = meters.inter_union(preds, labels, cfg.MODEL.NUM_CLASSES) else: ks = [1, min(5, cfg.MODEL.NUM_CLASSES)] # rot only has 4 classes top1_err, top5_err = meters.topk_errors(preds, labels, ks) # Combine the stats across the GPUs (no reduction if 1 GPU used) loss, top1_err, top5_err = dist.scaled_all_reduce([loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point) loss = loss.item() if cfg.TASK == "seg": top1_err, top5_err = top1_err.cpu().numpy(), top5_err.cpu().numpy() else: top1_err, top5_err = top1_err.item(), top5_err.item() train_meter.iter_toc() # Update and log stats train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats train_meter.log_epoch_stats(cur_epoch) train_meter.reset() # Log genotype genotype = m.genotype() logger.info("genotype = %s", genotype) logger.info(F.softmax(m.net_.alphas_normal, dim=-1)) logger.info(F.softmax(m.net_.alphas_reduce, dim=-1))