def distill_model(distillation_box, data_loader, optimizer, log_freq, device, epoch): metric_logger = misc_util.MetricLogger(delimiter=' ') metric_logger.add_meter( 'lr', misc_util.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0: warmup_factor = 1.0 / 1000.0 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = main_util.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for images, targets in metric_logger.log_every(data_loader, log_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss = distillation_box(images, targets) optimizer.zero_grad() loss.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=loss) metric_logger.update(lr=optimizer.param_groups[0]['lr'])
def train_model(model, optimizer, data_loader, device, epoch, log_freq): model.train() metric_logger = misc_util.MetricLogger(delimiter=' ') metric_logger.add_meter('lr', misc_util.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0: warmup_factor = 1.0 / 1000.0 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = main_util.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for images, targets in metric_logger.log_every(data_loader, log_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = misc_util.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() if not math.isfinite(loss_value): print('Loss is {}, stopping training'.format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]['lr'])