def run_demo(cfg, ckpt, score_threshold, images_dir: pathlib.Path, output_dir: pathlib.Path, dataset_type, num_images=None): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names elif dataset_type == "mnist": class_names = MNISTDetection.class_names elif dataset_type == "tdt4265": class_names = TDT4265Dataset.class_names elif dataset_type == "waymo": class_names = WaymoDataset.class_names else: raise NotImplementedError('Not implemented now.') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = list(images_dir.glob("*.png")) + list( images_dir.glob("*.jpg")) output_dir.mkdir(exist_ok=True, parents=True) transforms = build_transforms(cfg, is_train=False) model.eval() drawn_images = [] for i, image_path in enumerate( tqdm.tqdm(image_paths[:num_images], desc="Predicting on images")): image_name = image_path.stem image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) result = model(torch_utils.to_cuda(images))[0] result = result.resize((width, height)).cpu().numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) drawn_images.append(drawn_image) im = Image.fromarray(drawn_image) output_path = output_dir.joinpath(f"{image_name}.png") im.save(output_path) return drawn_images
def run_demo(cfg, ckpt, score_threshold, images_dir: pathlib.Path, output_dir: pathlib.Path, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names elif dataset_type == "mnist": class_names = MNISTDetection.class_names else: raise NotImplementedError('Not implemented now.') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) image_paths = list(images_dir.glob("*.png")) + list( images_dir.glob("*.jpg")) output_dir.mkdir(exist_ok=True, parents=True) transforms = build_transforms(cfg, is_train=False) model.eval() drawn_images = [] for i, image_path in enumerate(image_paths): start = time.time() image_name = image_path.name image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) load_time = time.time() - start start = time.time() result = model(torch_utils.to_cuda(images))[0] inference_time = time.time() - start result = result.resize((width, height)).cpu().numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] indices = scores > score_threshold boxes = boxes[indices] labels = labels[indices] scores = scores[indices] meters = "|".join([ 'objects {:02d}'.format(len(boxes)), 'load {:03d}ms'.format(round(load_time * 1000)), 'inference {:03d}ms'.format(round(inference_time * 1000)), 'FPS {}'.format(round(1.0 / inference_time)) ]) image_name = image_path.name drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) drawn_images.append(drawn_image) return drawn_images
def get_detections(cfg, ckpt): model = SSDDetector(cfg) model = torch_utils.to_cuda(model) checkpointer = CheckPointer(cfg, model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) dataset_path = DatasetCatalog.DATASETS["tdt4265_test"]["data_dir"] dataset_path = pathlib.Path(cfg.DATASET_DIR, dataset_path) image_dir = pathlib.Path(dataset_path, "images") image_paths = list(image_dir.glob("*.jpg")) transforms = build_transforms(cfg, is_train=False) model.eval() detections = [] labels = read_labels( image_dir.parent.parent.joinpath("train", "labels.json")) check_all_images_exists(labels, image_paths) # Filter labels on if they are test and only take the 7th frame labels = [label for label in labels if label["is_test"]] labels = [label for label in labels if label["image_id"] % 7 == 0] for i, label in enumerate(tqdm.tqdm(labels, desc="Inference on images")): image_id = label["image_id"] image_path = image_dir.joinpath(f"{image_id}.jpg") image_detections = {"image_id": int(image_id), "bounding_boxes": []} image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) result = model(torch_utils.to_cuda(images))[0] result = result.resize((width, height)).cpu().numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] for idx in range(len(boxes)): box = boxes[idx] label_id = labels[idx] label = TDT4265Dataset.class_names[label_id] assert label != "__background__" score = float(scores[idx]) assert box.shape == (4, ) json_box = { "xmin": float(box[0]), "ymin": float(box[1]), "xmax": float(box[2]), "ymax": float(box[3]), "label": str(label), "label_id": int(label_id), "confidence": float(score) } image_detections["bounding_boxes"].append(json_box) detections.append(image_detections) return detections
def start_train(cfg): logger = logging.getLogger('SSD.trainer') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) optimizer = torch.optim.SGD( model.parameters(), lr=cfg.SOLVER.LR, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY ) arguments = {"iteration": 0} save_to_disk = True checkpointer = CheckPointer( model, optimizer, cfg.OUTPUT_DIR, save_to_disk, logger, ) extra_checkpoint_data = checkpointer.load() arguments.update(extra_checkpoint_data) max_iter = cfg.SOLVER.MAX_ITER train_loader = make_data_loader(cfg, is_train=True, max_iter=max_iter, start_iter=arguments['iteration']) model = do_train( cfg, model, train_loader, optimizer, checkpointer, arguments) return model
def start_train(cfg): logger = logging.getLogger('SSD.trainer') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) lr = cfg.SOLVER.LR optimizer = make_optimizer(cfg, model, lr) milestones = [step for step in cfg.SOLVER.LR_STEPS] scheduler = make_lr_scheduler(cfg, optimizer, milestones) arguments = {"iteration": 0} save_to_disk = True checkpointer = CheckPointer(cfg, model, optimizer, scheduler, cfg.OUTPUT_DIR, save_to_disk, logger) extra_checkpoint_data = checkpointer.load() arguments.update(extra_checkpoint_data) max_iter = cfg.SOLVER.MAX_ITER train_loader = make_data_loader(cfg, is_train=True, max_iter=max_iter, start_iter=arguments['iteration']) model = do_train(cfg, model, train_loader, optimizer, scheduler, checkpointer, arguments) return model
def start_train(cfg): logger = logging.getLogger('SSD.trainer') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) optimizer = torch.optim.SGD( filter(lambda p: p.requires_grad, model.parameters()), lr=cfg.SOLVER.LR, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY, nesterov=True, ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=int(cfg.SOLVER.MAX_ITER / 1000), eta_min=0) arguments = {"iteration": 0} save_to_disk = True checkpointer = CheckPointer( model, optimizer, cfg.OUTPUT_DIR, save_to_disk, logger, ) extra_checkpoint_data = checkpointer.load() arguments.update(extra_checkpoint_data) max_iter = cfg.SOLVER.MAX_ITER train_loader = make_data_loader(cfg, is_train=True, max_iter=max_iter, start_iter=arguments['iteration']) model = do_train(cfg, model, train_loader, optimizer, checkpointer, arguments, scheduler) return model
def evaluation(cfg, ckpt): logger = logging.getLogger("SSD.inference") model = SSDDetector(cfg) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger) model = torch_utils.to_cuda(model) checkpointer.load(ckpt, use_latest=ckpt is None) do_evaluation(cfg, model)
def start_train(cfg, visualize_example=False): logger = logging.getLogger('SSD.trainer') model = SSDDetector(cfg) print(model) model = torch_utils.to_cuda(model) optimizer = torch.optim.SGD(model.parameters(), lr=cfg.SOLVER.LR, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY) """ optimizer = torch.optim.Adam( model.parameters(), lr=cfg.SOLVER.LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY ) """ """ lr_scheduler = torch.optim.lr_scheduler.CyclicLR( optimizer= optimizer, base_lr= cfg.SOLVER.LR /10, max_lr=0.05, step_size_up=8000, mode='triangular2' ) """ arguments = {"iteration": 0} save_to_disk = True checkpointer = CheckPointer( model, optimizer, cfg.OUTPUT_DIR, save_to_disk, logger, ) extra_checkpoint_data = checkpointer.load() arguments.update(extra_checkpoint_data) max_iter = cfg.SOLVER.MAX_ITER train_loader = make_data_loader(cfg, is_train=True, max_iter=max_iter, start_iter=arguments['iteration']) model = do_train(cfg, model, train_loader, optimizer, checkpointer, arguments, visualize_example, lr_scheduler=None) return model
def get_detections(cfg, ckpt): model = SSDDetector(cfg) model = torch_utils.to_cuda(model) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() print('Loaded weights from {}'.format(weight_file)) dataset_path = DatasetCatalog.DATASETS["tdt4265_test"]["data_dir"] dataset_path = pathlib.Path(cfg.DATASET_DIR, dataset_path) image_dir = pathlib.Path(dataset_path) image_paths = list(image_dir.glob("*.jpg")) transforms = build_transforms(cfg, is_train=False) model.eval() detections = [] for image_path in tqdm.tqdm(image_paths, desc="Inference on images"): image = np.array(Image.open(image_path).convert("RGB")) height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) result = model(torch_utils.to_cuda(images))[0] result = result.resize((width, height)).cpu().numpy() boxes, labels, scores = result['boxes'], result['labels'], result[ 'scores'] for idx in range(len(boxes)): box = boxes[idx] label_id = labels[idx] label = TDT4265Dataset.class_names[label_id] assert label != "__background__" score = float(scores[idx]) assert box.shape == (4, ) xmin, ymin, xmax, ymax = box width = xmax - xmin height = ymax - ymin detections.append({ "image_id": image_path.stem, "category_id": LABEL_MAP[label], "score": score, "bbox": [xmin, ymin, width, height] }) return detections
def compute_on_dataset(model, data_loader): results_dict = {} for batch in tqdm(data_loader): images, targets, image_ids = batch images = torch_utils.to_cuda(images) outputs = model(images) outputs = [o.cpu() for o in outputs] results_dict.update( {img_id: result for img_id, result in zip(image_ids, outputs)}) return results_dict
def evaluation(cfg, ckpt, N_images: int): model = SSDDetector(cfg) logger = logging.getLogger("SSD.inference") checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger) model = torch_utils.to_cuda(model) checkpointer.load(ckpt, use_latest=ckpt is None) model.eval() data_loaders_val = make_data_loader(cfg, is_train=False) for data_loader in data_loaders_val: batch = next(iter(data_loader)) images, targets, image_ids = batch images = torch_utils.to_cuda(images) imshape = list(images.shape[2:]) # warmup print("Checking runtime for image shape:", imshape) for i in range(10): model(images) start_time = time.time() for i in range(N_images): outputs = model(images) total_time = time.time() - start_time print("Runtime for image shape:", imshape) print("Total runtime:", total_time) print("FPS:", N_images / total_time)
def start_train(cfg): logger = logging.getLogger('SSD.trainer') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) if cfg.SOLVER.TYPE == "adam": optimizer = torch.optim.Adam( model.parameters(), lr=cfg.SOLVER.LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY, ) elif cfg.SOLVER.TYPE == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=cfg.SOLVER.LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY, momentum=cfg.SOLVER.MOMENTUM) else: # Default to Adam if incorrect solver print("WARNING: Incorrect solver type, defaulting to Adam") optimizer = torch.optim.Adam( model.parameters(), lr=cfg.SOLVER.LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY, ) scheduler = LinearMultiStepWarmUp(cfg, optimizer) arguments = {"iteration": 0} save_to_disk = True checkpointer = CheckPointer( model, optimizer, cfg.OUTPUT_DIR, save_to_disk, logger, ) extra_checkpoint_data = checkpointer.load() arguments.update(extra_checkpoint_data) max_iter = cfg.SOLVER.MAX_ITER train_loader = make_data_loader(cfg, is_train=True, max_iter=max_iter, start_iter=arguments['iteration']) model = do_train(cfg, model, train_loader, optimizer, checkpointer, arguments, scheduler) return model
def start_train(cfg): logger = logging.getLogger('SSD.trainer') model = SSDDetector(cfg) model = torch_utils.to_cuda(model) # SGD # optimizer = torch.optim.SGD( # model.parameters(), # lr=cfg.SOLVER.LR, # momentum=cfg.SOLVER.MOMENTUM, # weight_decay=cfg.SOLVER.WEIGHT_DECAY # ) # Adam optimizer = torch.optim.Adam(model.parameters(), lr=cfg.SOLVER.LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[6000, 10000], gamma=cfg.SOLVER.GAMMA) arguments = {"iteration": 0} save_to_disk = True checkpointer = CheckPointer( model, optimizer, cfg.OUTPUT_DIR, save_to_disk, logger, ) extra_checkpoint_data = checkpointer.load() arguments.update(extra_checkpoint_data) max_iter = cfg.SOLVER.MAX_ITER train_loader = make_data_loader(cfg, is_train=True, max_iter=max_iter, start_iter=arguments['iteration']) model = do_train(cfg, model, train_loader, optimizer, checkpointer, arguments, scheduler) return model
def __init__(self, cfg): super().__init__() image_size = cfg.INPUT.IMAGE_SIZE output_channels = cfg.MODEL.BACKBONE.OUT_CHANNELS self.output_channels = output_channels image_channels = cfg.MODEL.BACKBONE.INPUT_CHANNELS self.output_feature_size = cfg.MODEL.PRIORS.FEATURE_MAPS # Task 4a backbone """ feature_bank_extractors = nn.Sequential( nn.Sequential( nn.Conv2d(in_channels=image_channels, out_channels=32, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=2, stride=2), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=2, stride=2), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=output_channels[0], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Conv2d(in_channels=output_channels[0], out_channels=128, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=output_channels[1], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Conv2d(in_channels=output_channels[1], out_channels=256, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=output_channels[2], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Conv2d(in_channels=output_channels[2], out_channels=128, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=output_channels[3], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Conv2d(in_channels=output_channels[3], out_channels=128, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=output_channels[4], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Conv2d(in_channels=output_channels[4], out_channels=128, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=output_channels[5], kernel_size=3, stride=2, padding=0), ) ) """ # Custom backbone feature_bank_extractors = nn.Sequential( nn.Sequential( # Pad 320x240 to square (320x320) # nn.Conv2d(in_channels=image_channels, out_channels=image_channels, kernel_size=1, stride=1, padding=(40,0)),# nn.Conv2d(in_channels=image_channels, out_channels=16, kernel_size=4, stride=1, padding=2), nn.ReLU(), nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d(32), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=2, stride=2), nn.ReLU(), nn.Dropout2d(p=0.1), nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=1, padding=2), nn.MaxPool2d(kernel_size=2, stride=2), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=output_channels[0], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Dropout2d(p=0.2), nn.Conv2d(in_channels=output_channels[0], out_channels=128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, stride=1, padding=2), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=output_channels[1], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Dropout2d(p=0.1), nn.Conv2d(in_channels=output_channels[1], out_channels=256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=output_channels[2], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Dropout2d(p=0.1), nn.Conv2d(in_channels=output_channels[2], out_channels=256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=output_channels[3], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Dropout2d(p=0.1), nn.Conv2d(in_channels=output_channels[3], out_channels=128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=output_channels[4], kernel_size=3, stride=2, padding=1), ), nn.Sequential( nn.ReLU(), nn.Conv2d(in_channels=output_channels[4], out_channels=128, kernel_size=(2, 3), stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Dropout2d(p=0.2), nn.Conv2d(in_channels=128, out_channels=output_channels[5], kernel_size=3, stride=2, padding=0), )) self.feature_bank_extractors = torch_utils.to_cuda( feature_bank_extractors)
def do_train(cfg, model, data_loader, optimizer, scheduler, checkpointer, arguments): logger = logging.getLogger("SSD.trainer") logger.info("Start training ...") meters = MetricLogger() model.train() summary_writer = torch.utils.tensorboard.SummaryWriter( log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs')) max_iter = len(data_loader) start_iter = arguments["iteration"] start_training_time = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): iteration = iteration + 1 arguments["iteration"] = iteration images = torch_utils.to_cuda(images) targets = torch_utils.to_cuda(targets) loss_dict = model(images, targets=targets) loss = sum(loss for loss in loss_dict.values()) meters.update(total_loss=loss, **loss_dict) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time) if iteration % cfg.LOG_STEP == 0: eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) logger.info( meters.delimiter.join([ "iter: {iter:06d}", "lr: {lr:.5f}", '{meters}', "eta: {eta}", 'mem: {mem}M', ]).format(iter=iteration, lr=optimizer.param_groups[0]['lr'], meters=str(meters), eta=eta_string, mem=round(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0))) global_step = iteration summary_writer.add_scalar('losses/total_loss', loss, global_step=global_step) for loss_name, loss_item in loss_dict.items(): summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step) summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step) if iteration % cfg.MODEL_SAVE_STEP == 0: checkpointer.save("model_{:06d}".format(iteration), **arguments) if cfg.EVAL_STEP > 0 and iteration % cfg.EVAL_STEP == 0: eval_results = do_evaluation(cfg, model, iteration=iteration) for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST): write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration) model.train() # *IMPORTANT*: change to train mode after eval. checkpointer.save("model_final", **arguments) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / max_iter)) return model
def do_train(cfg, model, data_loader, optimizer, checkpointer, arguments, scheduler): logger = logging.getLogger("SSD.trainer") logger.info("Start training ...") meters = MetricLogger() model.train() summary_writer = torch.utils.tensorboard.SummaryWriter( log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs')) max_iter = len(data_loader) start_iter = arguments["iteration"] start_training_time = time.time() end = time.time() scaler = torch.cuda.amp.GradScaler() print(model) for iteration, (images, targets, _) in enumerate(data_loader, start_iter): iteration = iteration + 1 arguments["iteration"] = iteration images = torch_utils.to_cuda(images) targets = torch_utils.to_cuda(targets) # Casts operations to mixed precision with torch.cuda.amp.autocast(): loss_dict = model(images.half(), targets=targets) loss = sum(loss for loss in loss_dict.values()) meters.update(total_loss=loss, **loss_dict) optimizer.zero_grad() # Scales the loss, and calls backward() # to create scaled gradients scaler.scale(loss).backward() # loss.backward() # Unscales gradients and calls # or skips optimizer.step() scaler.step(optimizer) # optimizer.step(iteration) # Updates the scale for next iteration scaler.update() if iteration > 5000: scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time) if iteration % cfg.LOG_STEP == 0: eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) lr = optimizer.param_groups[0]['lr'] to_log = [ f"iter: {iteration:06d}", f"lr: {lr:.5f}", f'{meters}', f"eta: {eta_string}", ] if torch.cuda.is_available(): mem = round(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0) to_log.append(f'mem: {mem}M') logger.info(meters.delimiter.join(to_log)) global_step = iteration summary_writer.add_scalar('losses/total_loss', loss, global_step=global_step) for loss_name, loss_item in loss_dict.items(): summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step) summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step) if iteration % cfg.MODEL_SAVE_STEP == 0: checkpointer.save("model_{:06d}".format(iteration), **arguments) if cfg.EVAL_STEP > 0 and iteration % cfg.EVAL_STEP == 0: eval_results = do_evaluation(cfg, model, iteration=iteration) for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST): write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration) model.train() # *IMPORTANT*: change to train mode after eval. if iteration >= cfg.SOLVER.MAX_ITER: break checkpointer.save("model_final", **arguments) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / max_iter)) return model