def train(train_loader, valid_loader, model, w_optim, lr, epoch, sample, net_crit, train_meter): cur_step = epoch * len(train_loader) writer.add_scalar('train/lr', lr, cur_step) model.train() train_meter.iter_tic() scaler = torch.cuda.amp.GradScaler() if cfg.SEARCH.AMP & hasattr( torch.cuda.amp, 'autocast') else None for step, (trn_X, trn_y) in enumerate(train_loader): trn_X, trn_y = trn_X.cuda(), trn_y.cuda() # phase 1. child network step (w) if scaler is not None: with torch.cuda.amp.autocast(): # Perform the forward pass in AMP preds = model(trn_X, sample) # Compute the loss in AMP loss = net_crit(preds, trn_y) # Perform the backward pass in AMP w_optim.zero_grad() scaler.scale(loss).backward() scaler.step(w_optim) # Updates the scale for next iteration. scaler.update() else: preds = model(trn_X, sample) # Compute the loss loss = net_crit(preds, trn_y) # Perform the backward pass w_optim.zero_grad() loss.backward() # gradient clipping nn.utils.clip_grad_norm_(model.parameters(), cfg.OPTIM.GRAD_CLIP) # Update the parameters w_optim.step() # Compute the errors top1_err, top5_err = meters.topk_errors(preds, trn_y, [1, 5]) # Copy the stats from GPU to CPU (sync point) loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item( ) train_meter.iter_toc() # Update and log stats mb_size = trn_X.size(0) * cfg.NUM_GPUS train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size) train_meter.log_iter_stats(epoch, step) train_meter.iter_tic() # write to tensorboard writer.add_scalar('train/loss', loss, cur_step) writer.add_scalar('train/top1_error', top1_err, cur_step) writer.add_scalar('train/top5_error', top5_err, cur_step) cur_step += 1 # Log epoch stats train_meter.log_epoch_stats(epoch) train_meter.reset()
def train_epoch(train_loader, model, loss_fun, optimizer, train_meter, cur_epoch): """Performs one epoch of training.""" # Shuffle the data loader.shuffle(train_loader, cur_epoch) # Update the learning rate lr = optim.get_epoch_lr(cur_epoch) optim.set_lr(optimizer, lr) # Enable training mode model.train() train_meter.iter_tic() # scale the grad in amp, amp only support the newest version scaler = torch.cuda.amp.GradScaler() if cfg.TRAIN.AMP & hasattr( torch.cuda.amp, 'autocast') else None for cur_iter, (inputs, labels) in enumerate(train_loader): # Transfer the data to the current GPU device inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True) # using AMP if scaler is not None: with torch.cuda.amp.autocast(): # Perform the forward pass in AMP preds = model(inputs) # Compute the loss in AMP loss = loss_fun(preds, labels) # Perform the backward pass in AMP optimizer.zero_grad() scaler.scale(loss).backward() scaler.step(optimizer) # Updates the scale for next iteration. scaler.update() else: preds = model(inputs) # Compute the loss loss = loss_fun(preds, labels) # Perform the backward pass optimizer.zero_grad() loss.backward() # Update the parameters optimizer.step() # Compute the errors top1_err, top5_err = meters.topk_errors(preds, labels, [1, 5]) # Combine the stats across the GPUs (no reduction if 1 GPU used) loss, top1_err, top5_err = dist.scaled_all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point) loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item( ) train_meter.iter_toc() # Update and log stats mb_size = inputs.size(0) * cfg.NUM_GPUS train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, criterion, cur_epoch, train_meter): # TODO: DALI backend support # if config.data_loader_type == 'DALI': # len_train_loader = get_train_loader_len(config.dataset.lower(), config.batch_size, is_train=True) # else: # len_train_loader = len(train_loader) model.train() train_meter.iter_tic() cur_step = cur_epoch * len(train_loader) cur_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('train/lr', cur_lr, cur_step) # TODO: DALI backend support # if config.data_loader_type == 'DALI': # for cur_iter, data in enumerate(train_loader): # X = data[0]["data"].cuda(non_blocking=True) # y = data[0]["label"].squeeze().long().cuda(non_blocking=True) # if config.cutout_length > 0: # X = cutout_batch(X, config.cutout_length) # train_iter(X, y) # cur_step += 1 # train_loader.reset() for cur_iter, (X, y) in enumerate(train_loader): X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True) optimizer.zero_grad() logits, aux_logits = model(X) loss = criterion(logits, y) if cfg.TRAIN.AUX_WEIGHT > 0.: loss += cfg.TRAIN.AUX_WEIGHT * criterion(aux_logits, y) loss.backward() # gradient clipping nn.utils.clip_grad_norm_(model.parameters(), cfg.OPTIM.GRAD_CLIP) optimizer.step() top1_err, top5_err = meters.topk_errors(logits, y, [1, 5]) # Copy the stats from GPU to CPU (sync point) loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item( ) train_meter.iter_toc() # Update and log stats mb_size = X.size(0) * cfg.NUM_GPUS train_meter.update_stats(top1_err, top5_err, loss, cur_lr, mb_size) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # write to tensorboard writer.add_scalar('train/loss', loss, cur_step) writer.add_scalar('train/top1_error', top1_err, cur_step) writer.add_scalar('train/top5_error', top5_err, cur_step) cur_step += 1 # Log epoch stats train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def test_epoch( test_loader, model, test_meter, cur_epoch, sample, tensorboard_writer=None, ): """Evaluates the model on the test set.""" # Enable eval mode model.eval() test_meter.iter_tic() for cur_iter, (inputs, labels) in enumerate(test_loader): # Transfer the data to the current GPU device inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True) # using AMP if cfg.SEARCH.AMP & hasattr(torch.cuda.amp, 'autocast'): with torch.cuda.amp.autocast(): # Compute the predictions preds = model(inputs, sample) else: # Compute the predictions preds = model(inputs, sample) # Compute the errors top1_err, top5_err = meters.topk_errors(preds, labels, [1, 5]) # Combine the errors across the GPUs (no reduction if 1 GPU used) # top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point) top1_err, top5_err = top1_err.item(), top5_err.item() test_meter.iter_toc() # Update and log stats test_meter.update_stats(top1_err, top5_err, inputs.size(0) * cfg.NUM_GPUS) test_meter.log_iter_stats(cur_epoch, cur_iter) test_meter.iter_tic() top1_err = test_meter.mb_top1_err.get_win_median() if tensorboard_writer is not None: tensorboard_writer.add_scalar('val/top1_error', test_meter.mb_top1_err.get_win_median(), cur_epoch) tensorboard_writer.add_scalar('val/top5_error', test_meter.mb_top5_err.get_win_median(), cur_epoch) # Log epoch stats test_meter.log_epoch_stats(cur_epoch) test_meter.reset() return top1_err
def valid_epoch(valid_loader, model, criterion, cur_epoch, cur_step, valid_meter): model.eval() valid_meter.iter_tic() for cur_iter, (X, y) in enumerate(valid_loader): X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True) logits, _ = model(X) loss = criterion(logits, y) # Compute the errors top1_err, top5_err = meters.topk_errors(logits, y, [1, 5]) # Combine the errors across the GPUs (no reduction if 1 GPU used) # NOTE: this line is disabled before. top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point) top1_err, top5_err = top1_err.item(), top5_err.item() valid_meter.iter_toc() # Update and log stats valid_meter.update_stats(top1_err, top5_err, X.size(0) * cfg.NUM_GPUS) valid_meter.log_iter_stats(cur_epoch, cur_iter) valid_meter.iter_tic() top1_err = valid_meter.mb_top1_err.get_win_median() valid_meter.log_epoch_stats(cur_epoch) valid_meter.reset() return top1_err
def train_epoch(train_loader, valid_loader, model, architect, loss_fun, w_optimizer, alpha_optimizer, lr, train_meter, cur_epoch): model.train() train_meter.iter_tic() cur_step = cur_epoch * len(train_loader) writer.add_scalar('train/lr', lr, cur_step) # scale the grad in amp, amp only support the newest version scaler = torch.cuda.amp.GradScaler() if cfg.SEARCH.AMP & hasattr( torch.cuda.amp, 'autocast') else None valid_loader_iter = iter(valid_loader) for cur_iter, (trn_X, trn_y) in enumerate(train_loader): try: (val_X, val_y) = next(valid_loader_iter) except StopIteration: valid_loader_iter = iter(valid_loader) (val_X, val_y) = next(valid_loader_iter) # Transfer the data to the current GPU device trn_X, trn_y = trn_X.cuda(), trn_y.cuda(non_blocking=True) val_X, val_y = val_X.cuda(), val_y.cuda(non_blocking=True) # phase 2. architect step (alpha) alpha_optimizer.zero_grad() architect.unrolled_backward(trn_X, trn_y, val_X, val_y, lr, w_optimizer, unrolled=cfg.DARTS.SECOND) alpha_optimizer.step() # phase 1. child network step (w) if scaler is not None: with torch.cuda.amp.autocast(): # Perform the forward pass in AMP preds = model(trn_X) # Compute the loss in AMP loss = loss_fun(preds, trn_y) # Perform the backward pass in AMP w_optimizer.zero_grad() scaler.scale(loss).backward() scaler.step(w_optimizer) # Updates the scale for next iteration. scaler.update() else: preds = model(trn_X) # Compute the loss loss = loss_fun(preds, trn_y) # Perform the backward pass w_optimizer.zero_grad() loss.backward() # gradient clipping nn.utils.clip_grad_norm_(model.weights(), cfg.OPTIM.GRAD_CLIP) # Update the parameters w_optimizer.step() # Compute the errors top1_err, top5_err = meters.topk_errors(preds, trn_y, [1, 5]) # Copy the stats from GPU to CPU (sync point) loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item( ) train_meter.iter_toc() # Update and log stats mb_size = trn_X.size(0) * cfg.NUM_GPUS train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # write to tensorboard writer.add_scalar('train/loss', loss, cur_step) writer.add_scalar('train/top1_error', top1_err, cur_step) writer.add_scalar('train/top5_error', top5_err, cur_step) cur_step += 1 # Log epoch stats train_meter.log_epoch_stats(cur_epoch) train_meter.reset()