def test_mb2_ssd_coco_80(self): model = get_model_by_name( model_name="mb2_ssd", dataset_name="coco_80", pretrained=True, progress=False, ) from deeplite_torch_zoo.src.objectdetection.datasets.coco_config import ( DATA, MISSING_IDS, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/coco2017/", dataset_name="coco", model_name="mb2_ssd", batch_size=32, missing_ids=MISSING_IDS, classes=DATA["CLASSES"], )["test"] cocoGt = COCO( "/neutrino/datasets/coco2017/annotations/instances_val2017.json") eval_fn = get_eval_function("mb2_ssd", "coco_80") APs = eval_fn( model, test_loader, gt=cocoGt, _set="coco", ) print(APs) self.assertEqual(abs(APs["mAP"] - 0.138) < 0.001, True)
def test_mb2_ssd_coco_6(self): model = get_model_by_name( model_name="mb2_ssd", dataset_name="coco_gm_6", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/home/ehsan/data/", dataset_name="coco_gm", model_name="mb2_ssd", batch_size=32, train_ann_file="train_data_COCO.json", train_dir="images/train", val_ann_file="test_data_COCO.json", val_dir="images/test", classes=[ "class1", "class2", "class3", "class4", "class5", "class6" ], )["test"] cocoGt = COCO("/home/ehsan/data/test_data_COCO.json") eval_fn = get_eval_function("mb2_ssd", "coco_gm") APs = eval_fn( model, test_loader, gt=cocoGt, _set="coco", ) self.assertEqual(abs(APs["mAP"] - 0.227) < 0.001, True)
def test_eval_objectdetection(self): all_objectdetection_models = list_models( task_type_filter="object_detection", print_table=False, return_list=True) for (model_name, dataset_name) in all_objectdetection_models: funct = get_eval_function(model_name=model_name, dataset_name=dataset_name) assert funct in objectdetection_eval_list
def test_eval_segmentation(self): all_segmentation_models = list_models( task_type_filter="semantic_segmentation", print_table=False, return_list=True, ) for (model_name, dataset_name) in all_segmentation_models: funct = get_eval_function(model_name=model_name, dataset_name=dataset_name) assert funct in segmentation_eval_list
def test_yolov5_6s_voc(self): model = get_model_by_name( model_name="yolo5_6s", dataset_name="voc_20", pretrained=True, progress=False, ) eval_fn = get_eval_function("yolo5_6s", "voc_20") APs = eval_fn(model, "/neutrino/datasets//VOCdevkit/VOC2007/", _set="voc") print(APs) self.assertEqual(abs(APs["mAP"] - 0.821) < 0.001, True)
def test_mb3_small_vww(self): model = get_model_by_name( model_name="mobilenetv3_small", dataset_name="vww", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/vww", dataset_name="vww", batch_size=128, )["test"] eval_fn = get_eval_function("mobilenetv3_small", "vww") ACC = eval_fn(model, test_loader) self.assertEqual(abs(ACC["acc"] - 0.892) < 0.001, True)
def test_mb2_ssd_voc_20(self): model = get_model_by_name( model_name="mb2_ssd", dataset_name="voc_20", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/VOCdevkit", dataset_name="voc", model_name="mb2_ssd_lite", batch_size=32, )["test"] eval_fn = get_eval_function("mb2_ssd", "voc_20") APs = eval_fn(model, test_loader) self.assertEqual(abs(APs["mAP"] - 0.443) < 0.001, True)
def test_resnet50_tinyimagenet(self): model = get_model_by_name( model_name="resnet50", dataset_name="tinyimagenet", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/TinyImageNet/", dataset_name="tinyimagenet", batch_size=128, num_workers=0, )["val"] eval_fn = get_eval_function("resnet50", "tinyimagenet") ACC = eval_fn(model, test_loader) print(ACC) self.assertEqual(abs(ACC["acc"] - 0.730) < 0.001, True)
def test_vgg16_ssd_wider_face(self): model = get_model_by_name( model_name="vgg16_ssd", dataset_name="wider_face", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/wider_face", dataset_name="wider_face", model_name="vgg16_ssd", batch_size=8, )["test"] eval_fn = get_eval_function("vgg16_ssd", "wider_face") APs = eval_fn(model, test_loader) print(APs) self.assertEqual(abs(APs["mAP"] - 0.7071) < 0.001, True)
def test_unet_scse_resnet18_carvana(self): model = get_model_by_name( model_name="unet_scse_resnet18", dataset_name="carvana", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/carvana", dataset_name="carvana", model_name="unet", num_workers=1, )["test"] eval_fn = get_eval_function("unet_scse_resnet18", "carvana") acc = eval_fn(model, test_loader, net="unet_scse_resnet18") miou = acc["miou"] print(miou) self.assertEqual(abs(miou - 0.989) < 0.001, True)
def test_unet_carvana(self): model = get_model_by_name( model_name="unet", dataset_name="carvana", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/carvana", dataset_name="carvana", model_name="unet", num_workers=1, )["test"] eval_fn = get_eval_function("unet", "carvana") acc = eval_fn(model, test_loader, net="unet") dc = acc["dice_coeff"] print(dc) self.assertEqual(abs(dc - 0.983) < 0.001, True)
def test_fasterrcnn_resnet50_fpn_coco(self): model = get_model_by_name( model_name="fasterrcnn_resnet50_fpn", dataset_name="coco_80", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets/coco2017/", dataset_name="coco", model_name="fasterrcnn_resnet50_fpn", batch_size=32, )["test"] cocoGt = COCO( "/neutrino/datasets/coco2017/annotations/instances_val2017.json") eval_fn = get_eval_function("fasterrcnn_resnet50_fpn", "coco_80") APs = eval_fn(model, test_loader, gt=cocoGt) self.assertEqual(abs(APs["mAP"] - 0.369) < 0.001, True)
def test_deeplab_mobilenet_voc_20(self): model = get_model_by_name( model_name="deeplab_mobilenet", dataset_name="voc_20", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets", sbd_root=None, dataset_name="voc", model_name="deeplab_mobilenet", num_workers=2, backbone="vgg", )["test"] eval_fn = get_eval_function("deeplab_mobilenet", "voc_20") acc = eval_fn(model, test_loader, net="deeplab") miou = acc["miou"] print(miou) self.assertEqual(abs(miou - 0.571) < 0.001, True)
def test_fcn32_voc_20(self): model = get_model_by_name( model_name="fcn32", dataset_name="voc_20", pretrained=True, progress=False, ) test_loader = get_data_splits_by_name( data_root="/neutrino/datasets", dataset_name="voc", model_name="fcn32", num_workers=1, batch_size=1, backbone="vgg", )["test"] eval_fn = get_eval_function("fcn32", "voc_20") acc = eval_fn(model, test_loader, net="fcn32") miou = acc["miou"] print(miou) self.assertEqual(abs(miou - 0.713) < 0.001, True)
def train(opt, device): epochs, batch_size, noval, nosave, workers, freeze, = \ opt.epochs, opt.batch_size, opt.noval, opt.nosave, opt.workers, opt.freeze d = datetime.datetime.now() run_id = '{:%Y-%m-%d__%H-%M-%S}'.format(d) save_dir = Path(opt.save_dir) / run_id # Directories w = save_dir / 'weights' # weights dir w.mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Get hyperparameter dict hyp, hyp_loss = get_hyperparameter_dict(opt.dataset_name, opt.hp_config) # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.safe_dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.safe_dump(vars(opt), f, sort_keys=False) tb_writer = SummaryWriter(save_dir) opt.img_dir = Path(opt.img_dir) # Config cuda = device.type != 'cpu' init_seeds(1 + RANK) # Dataloaders dataset_kwargs = {} if opt.train_img_res: dataset_kwargs = {'img_size': opt.train_img_res} dataset_splits = get_data_splits_by_name( data_root=opt.img_dir, dataset_name=opt.dataset_name, model_name=opt.model_name, batch_size=batch_size, num_workers=workers, distributed=(cuda and RANK != -1), **dataset_kwargs ) test_img_size = dataset_splits["test"].dataset._img_size train_img_size = dataset_splits["train"].dataset._img_size if opt.test_img_res: test_img_size = opt.test_img_res train_loader = dataset_splits["train"] dataset = train_loader.dataset nc = dataset.num_classes nb = len(train_loader) # number of batches # Model model = create_model( model_name=opt.model_name, pretraining_dataset=opt.pretraining_source_dataset, pretrained=opt.pretrained, num_classes=nc, progress=True, device=device, ) # Freeze freeze = [f'model.{x}.' for x in range(freeze)] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print(f'freezing {k}') v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay LOGGER.info(f"Scaled weight_decay = {hyp['weight_decay']}") g0, g1, g2 = [], [], [] # optimizer parameter groups for v in model.modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias g2.append(v.bias) if isinstance(v, nn.BatchNorm2d): # weight (no decay) g0.append(v.weight) elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) g1.append(v.weight) if opt.adam: optimizer = Adam(g0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = SGD(g0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({'params': g1, 'weight_decay': hyp['weight_decay']}) # add g1 with weight_decay optimizer.add_param_group({'params': g2}) # add g2 (biases) LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups " f"{len(g0)} weight, {len(g1)} weight (no decay), {len(g2)} bias") del g0, g1, g2 # Scheduler if opt.linear_lr: lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear else: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in [-1, 0] else None start_epoch, best_fitness = 0, 0.0 # Image sizes gs = max(int(model.stride.max()), 32) # grid size (max stride) nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n' 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Process 0 if RANK in [-1, 0]: # Anchors model.half().float() # pre-reduce anchor precision # DDP mode if cuda and RANK != -1: model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # Model parameters hyp['giou'] *= 3. / nl # scale to layers hyp['box'] = hyp['giou'] hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (train_img_size / 640) ** 2 * 3. / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model eval_function = get_eval_function(dataset_name=opt.dataset_name, model_name=opt.model_name) criterion = YoloV5Loss( model=model, num_classes=nc, device=device, hyp_cfg=hyp_loss, ) if opt.eval_before_train: ap_dict = evaluate(model, eval_function, opt.dataset_name, opt.img_dir, nc, test_img_size, device) LOGGER.info(f'Eval metrics: {ap_dict}') # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) last_opt_step = -1 scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) stopper = EarlyStopping(patience=opt.patience) loss_giou_mean = AverageMeter() loss_conf_mean = AverageMeter() loss_cls_mean = AverageMeter() loss_mean = AverageMeter() LOGGER.info(f'Image sizes {train_img_size} train, {test_img_size} val\n' f'Using {train_loader.num_workers} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch model.train() mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%10s' * 7) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'labels', 'img_size')) if RANK in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, (imgs, targets, labels_length, _) in pbar: # batch ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() # Warmup if ni <= nw: xi = [0, nw] # x interp accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(train_img_size * 0.5, train_img_size * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_giou, loss_conf, loss_cls = criterion( pred, targets, labels_length, imgs.shape[-1] ) # Update running mean of tracked metrics loss_items = torch.tensor([loss_giou, loss_conf, loss_cls]).to(device) if RANK in (-1, 0): loss_giou_mean.update(loss_giou, imgs.size(0)) loss_conf_mean.update(loss_conf, imgs.size(0)) loss_cls_mean.update(loss_cls, imgs.size(0)) loss_mean.update(loss, imgs.size(0)) if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni - last_opt_step >= accumulate: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%10s' * 2 + '%10.4g' * 5) % ( f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) # end batch # Scheduler scheduler.step() if RANK in [-1, 0]: for idx, param_group in enumerate(optimizer.param_groups): tb_writer.add_scalar(f'learning_rate/gr{idx}', param_group['lr'], epoch) tb_writer.add_scalar('train/giou_loss', loss_giou_mean.avg, epoch) tb_writer.add_scalar('train/conf_loss', loss_conf_mean.avg, epoch) tb_writer.add_scalar('train/cls_loss', loss_cls_mean.avg, epoch) tb_writer.add_scalar('train/loss', loss_mean.avg, epoch) # mAP ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if (not noval or final_epoch) and epoch % opt.eval_freq == 0: # Calculate mAP ap_dict = evaluate(ema.ema, eval_function, opt.dataset_name, opt.img_dir, nc, test_img_size, device) LOGGER.info(f'Eval metrics: {ap_dict}') tb_writer.add_scalar('eval/mAP', ap_dict['mAP'], epoch) for eval_key, eval_value in ap_dict.items(): if eval_key != 'mAP': tb_writer.add_scalar(f'ap_per_class/{eval_key}', eval_value, epoch) # Update best mAP fi = ap_dict['mAP'] if fi > best_fitness: best_fitness = fi # Save model if (not nosave) or final_epoch: # if save ckpt = {'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if (epoch > 0) and (opt.save_period > 0) and (epoch % opt.save_period == 0): torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt # Stop Single-GPU if RANK == -1 and stopper(epoch=epoch, fitness=fi): break # end epoch # end training if RANK in [-1, 0]: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') ckpt = torch.load(f, map_location=device) model = ckpt['ema' if ckpt.get('ema') else 'model'] model.float().eval() ap_dict = evaluate(model, eval_function, opt.dataset_name, opt.img_dir, nc, test_img_size, device) LOGGER.info(f'Eval metrics: {ap_dict}') LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") torch.cuda.empty_cache()