def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq, writer): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) for i, (image, target) in enumerate( metric_logger.log_every(data_loader, print_freq, header)): image, target = image.to(device), target.to(device) output = model(image) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() lr_scheduler.step() metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) writer.add_scalar('training loss', loss.item(), epoch * len(data_loader) + i) writer.add_scalar('lr', optimizer.param_groups[0]["lr"], epoch * len(data_loader) + i)
def evaluate(model, data_loader, device, own_mAP=True): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) if own_mAP: for output in outputs: box = output["boxes"] score = output["scores"] label = output["labels"] keep = box_nms(box, score) output["boxes"] = box[keep] output["scores"] = score[keep] output["labels"] = label[keep] outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
def evaluate(model, data_loader, device, num_classes): from config import MAPILLARY_CLASSNAMES as classnames model.eval() confmat = utils.ConfusionMatrix(num_classes, classnames) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, 200, header): image, target = image.to(device), target.to(device) output = model(image) confmat.update(target.cpu().flatten(), output.argmax(1).cpu().flatten()) confmat.reduce_from_all_processes() return confmat
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for images, targets in metric_logger.log_every(data_loader, print_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"])