def main(): anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] print('Single models in memory ... ') for cp_coef in PICK_MODELS: print() print('Model: d' + str(cp_coef), '>>>') start = time.perf_counter() model = EfficientDetBackbone(compound_coef=cp_coef, num_classes=90, ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d{cp_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() model = model.cpu() print('Loading(s): {:.2f}'.format(time.perf_counter() - start)) dim = input_sizes[cp_coef] size = (3, dim, dim) run_inf(model, size, cp_coef, start_bs=1)
def main(): anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] print('Loading all models in memory ... ') models = [] for cp_coef in range(8): model = EfficientDetBackbone(compound_coef=cp_coef, num_classes=90, ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d{cp_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() model = model.cuda() models.append(model) param = [] for i, m in enumerate(models): if i not in PICK_MODELS: continue print() print('Model: d' + str(i), '>>>') dim = input_sizes[i] size = (3, dim, dim) run_inf(m, size, i, start_bs=128) for i, m in enumerate(models): out = m(torch.randn(1, 3, input_sizes[0], input_sizes[0]).cuda())
def main(img_path, base_name, checkpoint_path): ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) # model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.load_state_dict(torch.load(checkpoint_path)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(out, ori_imgs, base_name,imshow=False, imwrite=True)
def show(args): input_size = input_sizes[args.compound_coef] ori_imgs, framed_imgs, framed_metas = eval_preprocess(args.img_path, max_size=input_size) if args.use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=args.compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(args.pth, map_location='cpu')) model.requires_grad_(False) model.eval() if args.use_cuda: model = model.cuda(device=args.device) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = Rotation_BBoxTransform() clipBoxes = ClipBoxes() addBoxes = BBoxAddScores() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, addBoxes, args.score_threshold, args.iou_threshold) out = invert_affine(framed_metas, out) display(out, ori_imgs, imshow=True, imwrite=False)
def read_images(): for filename in os.listdir(imgfile_path): ori_imgs, framed_imgs, framed_metas = preprocess(os.path.join( imgfile_path, filename), max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=7, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d7/efficientdet-d7.pth') ) #place weight path here model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(filename, out, ori_imgs, imshow=False, imwrite=True) print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
def test(threshold=0.2): with open("datasets/vcoco/new_prior_mask.pkl", "rb") as file: prior_mask = pickle.load(file, encoding="bytes") model = EfficientDetBackbone(num_classes=len(eval(params["obj_list"])), num_union_classes=25, num_inst_classes=51, compound_coef=args.compound_coef, ratios=eval(params["anchors_ratios"]), scales=eval(params["anchors_scales"])) model.load_state_dict( torch.load(weights_path, map_location=torch.device('cpu'))) model.requires_grad_(False) model.eval() if args.cuda: model = model.cuda() if args.float16: model = model.half() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() img_dir = os.path.join(data_dir, "vcoco/coco/images/%s" % "val2014") with open(os.path.join(data_dir, 'vcoco/data/splits/vcoco_test.ids'), 'r') as f: image_ids = f.readlines() image_ids = [int(id) for id in image_ids] _t = {'im_detect': Timer(), 'misc': Timer()} detection = [] for i, image_id in enumerate(image_ids): _t['im_detect'].tic() file = "COCO_val2014_" + (str(image_id)).zfill(12) + '.jpg' img_detection = img_detect(file, img_dir, model, input_size, regressBoxes, clipBoxes, prior_mask, threshold=threshold) detection.extend(img_detection) if need_visual: visual(img_detection, image_id) _t['im_detect'].toc() print('im_detect: {:d}/{:d}, average time: {:.3f}s'.format( i + 1, len(image_ids), _t['im_detect'].average_time)) with open(detection_path, "wb") as file: pickle.dump(detection, file)
def test(threshold=0.2): model = EfficientDetBackbone(num_classes=num_objects, num_union_classes=num_union_actions, num_inst_classes=num_inst_actions, compound_coef=args.compound_coef, ratios=eval(params["anchors_ratios"]), scales=eval(params["anchors_scales"])) model.load_state_dict( torch.load(weights_path, map_location=torch.device('cpu'))) model.requires_grad_(False) model.eval() if args.cuda: model = model.cuda() if args.float16: model = model.half() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() img_dir = os.path.join(data_dir, "hico_20160224_det/images/%s" % "test2015") _t = {'im_detect': Timer(), 'misc': Timer()} detection = {} count = 0 for line in glob.iglob(img_dir + '/' + '*.jpg'): count += 1 _t['im_detect'].tic() image_id = int(line[-9:-4]) file = "HICO_test2015_" + (str(image_id)).zfill(8) + ".jpg" # if file != "COCO_val2014_000000001987.jpg": # continue dets = img_detect(file, img_dir, model, input_size, regressBoxes, clipBoxes, threshold=threshold) detection[image_id] = dets # detection.extend(img_detection) _t['im_detect'].toc() print('im_detect: {:d}/{:d}, average time: {:.3f}s'.format( count, 9658, _t['im_detect'].average_time)) with open(detection_path, "wb") as file: pickle.dump(detection, file)
def test(opt): params = Params(f'projects/{opt.project}.yml') project_name = params.project_name obj_list = params.obj_list compound_coef = opt.compound_coef force_input_size = None # set None to use default size img_dir = opt.img_dir model_path = opt.model_path use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(model_path)) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() gt = COCO(opt.ann_file) gt_lst = load_coco_bboxes(gt, is_gt=True) imgs = glob.glob(os.path.join(img_dir, '*.jpg')) det_lst = [] progressbar = tqdm(imgs) for i, img in enumerate(progressbar): det = single_img_test(img, input_size, model, use_cuda, use_float16) det_lst.extend(det) progressbar.update() progressbar.set_description('Step: {}/{}'.format(i, len(imgs))) evaluator = Evaluator() ret, mAP = evaluator.GetMAPbyClass( gt_lst, det_lst, method='EveryPointInterpolation' ) # Get metric values per each class for metricsPerClass in ret: cl = metricsPerClass['class'] ap = metricsPerClass['AP'] ap_str = '{0:.3f}'.format(ap) print('AP: %s (%s)' % (ap_str, cl)) mAP_str = '{0:.3f}'.format(mAP) print('mAP: %s\n' % mAP_str)
def load_model(compound_coef, obj_list, params, weights_path, use_cuda, use_float16): model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) model.load_state_dict( torch.load(weights_path, map_location=torch.device('cpu'))) model.requires_grad_(False) model.eval() if use_cuda: model.cuda(gpu) if use_float16: model.half() return model
def model_fn(model_dir): with open(os.path.join(model_dir, "hyperparameters.yml")) as f: hps = yaml.load(f, yaml.FullLoader) model = EfficientDetBackbone( compound_coef=hps["compound_coef"], num_classes=len(hps["classes"]), ratios=hps["anchors_ratios"], scales=hps["anchors_scales"], ) with open(os.path.join(model_dir, "model.pth"), "rb") as f: # without map_location=torch.device('cpu'), the weights are loaded to # default tensor device which was assigned when the weights were saved, such as # 'cuda:0', instead of forcing to be loaded into cpu memory first. # most of the time it works without map_location=torch.device('cpu'), # but in case one run coco_eval on a cpu-only server, it might fail model.load_state_dict(torch.load(f, map_location=torch.device("cpu"))) return model
def load_apex_model(compound_coef, obj_list, params, weights_path): opt_level = 'O1' model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) checkpoint = torch.load(weights_path) model = model.cuda(gpu) optimizer = torch.optim.AdamW(model.parameters(), lr) model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) amp.load_state_dict(checkpoint['amp']) model.requires_grad_(False) model.cuda(gpu) model = model.eval() return model
def main(): anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] models = [] for cp_coef in range(8): model = EfficientDetBackbone(compound_coef=cp_coef, num_classes=90, ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d{cp_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() model = model.cuda() models.append(model) while True: time.sleep(1)
def model_fn(model_dir): # based entirely off of # https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/blob/master/coco_eval.py print(f'building and loading efficientdet d{EFFICIENTDET_COMPOUND_COEF}') model = EfficientDetBackbone(compound_coef=EFFICIENTDET_COMPOUND_COEF, num_classes=len(PARAMS['obj_list']), ratios=eval(PARAMS['anchors_ratios']), scales=eval(PARAMS['anchors_scales'])) state_dict = torch.hub.load_state_dict_from_url( url=get_weights_url(c=EFFICIENTDET_COMPOUND_COEF), model_dir=model_dir, map_location=torch.device('cpu')) model.load_state_dict(state_dict) model.requires_grad_(False) model.eval() if USE_CUDA: model.cuda(0) if USE_FLOAT16: model.half() return model
def load_model(weights_path: Union[str, os.PathLike], p_cfg_path: Union[str, os.PathLike], compound_coef: float) -> EfficientDetBackbone : """Loads and return model with given weights and project config. Args: weights_path (Union[str, os.PathLike]): Path to model weights. p_cfg_path (Union[str, os.PathLike]): Path to Project config yaml file. compound_coef (float): Compund scaling coefficient. Returns: EfficientDetBackbone: EfficientDet model """ params = yaml.safe_load(open(p_cfg_path)) obj_list = params['obj_list'] model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) model.load_state_dict(torch.load(weights_path, map_location=DEVICE)) if USE_CUDA: model.cuda() model.requires_grad_(False) model.eval() return model
def model_init(args): compound_coef = args.compound_coef checkpoint = args.checkpoint use_cuda = not args.cpu cudnn.fastest = True cudnn.benchmark = True # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] # tf bilinear interpolation is different from any other's, just make do model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=90, ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(checkpoint, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() return model
def main(): anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] for cp_coef in range(8): print() print('Model: d' + str(cp_coef), '>>>') total_time = 0 for i in range(100): if i % 10 == 0: start = time.perf_counter() model = EfficientDetBackbone(compound_coef=cp_coef, num_classes=90, ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(f'weights/efficientdet-d{cp_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() model = model.cuda() total_time += time.perf_counter() - start else: model = None print('Loading(s): {:.2f}'.format(total_time / 10))
def eval(pretrained_weights: Path, inputs_splitted_into_lists: list, compound_coef: int, use_cuda: bool) -> list: threshold = 0.2 iou_threshold = 0.2 # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=1, ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(pretrained_weights, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() predictions = [] for inputs_split in inputs_splitted_into_lists: with torch.no_grad(): features, regression, classification, anchors = model(inputs_split) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(inputs_split, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) predictions += out return predictions
config = InferenceConfig() use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True color_list = standard_to_bgr(STANDARD_COLORS) input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] model = EfficientDetBackbone(compound_coef=config.compound_coef, num_classes=len(config.obj_list), ratios=config.anchor_ratios, scales=config.anchor_scales) model.load_state_dict(torch.load(opt.weights)) model.requires_grad_(False) model.eval() if opt.command == 'report': square_size = params.square_size red_box = ((config.crop_size - square_size) / 2, (config.crop_size - square_size) / 2, (config.crop_size + square_size) / 2, (config.crop_size + square_size) / 2) fail_ids = eval(opt.fail_ids) pass_ids = eval(opt.pass_ids) thresholds = eval(opt.thresholds) if os.path.isfile(opt.dataset): raise ValueError("dataset must be a folder")
def train(opt): params = Params(f'projects/{opt.project}.yml') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] training_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.train_set, phase='train', transforms=get_train_transforms()) val_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.val_set, phase='val', transforms=get_valid_transforms()) training_generator = torch.utils.data.DataLoader( training_set, batch_size=opt.batch_size, sampler=RandomSampler(training_set), pin_memory=False, drop_last=True, num_workers=opt.num_workers, collate_fn=collate_fn, ) val_generator = torch.utils.data.DataLoader( val_set, batch_size=opt.batch_size, num_workers=opt.num_workers, shuffle=False, sampler=SequentialSampler(val_set), pin_memory=False, collate_fn=collate_fn, ) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 accumulation_steps = 32 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, (imgs, annots) in enumerate(progress_bar): pass if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = torch.stack(imgs) annot = pad_annots(annots) if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() # print(annot) # optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) if (iter + 1) % (accumulation_steps // opt.batch_size) == 0: # print('step') optimizer.step() optimizer.zero_grad() # optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, (imgs, annots) in enumerate(val_generator): with torch.no_grad(): imgs = torch.stack(imgs) annot = pad_annots(annots) if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
# tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x,
class Detector(object): def __init__(self, args): self.args = args use_cuda = bool(strtobool(self.args.use_cuda)) params = Params(f'projects/{self.args.project}.yml') self.submit = True self.cam_id = 1 self.object_list = [] self.object_list_tracks = [] if args.display: pass # cv2.namedWindow("test", cv2.WINDOW_NORMAL) # cv2.resizeWindow("test", args.display_width, args.display_height) self.vdo = cv2.VideoCapture() self.efficientdet = EfficientDetBackbone( num_classes=len(params.obj_list), compound_coef=self.args.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)).cuda() # self.yolo3 = YOLOv3(args.yolo_cfg, args.yolo_weights, args.yolo_names, is_xywh=True, conf_thresh=args.conf_thresh, nms_thresh=args.nms_thresh, use_cuda=use_cuda) self.deepsort = DeepSort(args.deepsort_checkpoint, use_cuda=True) # self.class_names = self.yolo3.class_names self.efficientdet.load_state_dict(torch.load( args.detector_weights_path), strict=False) def __enter__(self): self.im_width = 1920 self.im_height = 1280 if self.args.save_path: fourcc = cv2.VideoWriter_fourcc(*'DIVX') self.output = cv2.VideoWriter(self.args.save_path, fourcc, 10, (self.im_width, self.im_height)) return self def __exit__(self, exc_type, exc_value, exc_traceback): if exc_type: print(exc_type, exc_value, exc_traceback) def detect(self): for tf_idx, tfrecord in enumerate(tqdm(tfrecord_paths[2:])): self.object_list = [] self.object_list_tracks = [] training_set = TUMuchTrackingDataset(tfrecord_path=tfrecord, transform=tfs, cam_id=self.cam_id) training_generator = DataLoader(training_set, **training_params) for it, data in enumerate(training_generator): imgs = data['img'].to(torch.device("cuda:0")) if self.submit: meta = data['meta'] with torch.no_grad(): features, regression, classification, anchors = self.efficientdet( imgs) out = postprocess(imgs, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # boxes is cx, cy, cw, ch boxes = out[0]["rois"] for idx in range(out[0]["rois"].shape[0]): cx, cy, lx, ly = out[0]["rois"][idx] cw, ch = lx - cx, ly - cy boxes[idx][0] = cx + cw / 2 boxes[idx][1] = cy + ch / 2 boxes[idx][2] = cw boxes[idx][3] = ch bbox_xcycwh, cls_conf, cls_ids = boxes, out[0]["scores"], out[ 0]["class_ids"] if bbox_xcycwh is not None: mask = cls_ids <= 4 bbox_xcycwh = bbox_xcycwh[mask] try: bbox_xcycwh[:, 3:] *= 1 except: continue cls_conf = cls_conf[mask] im = imgs.cpu().numpy() im = im[0, :, :, :] im = np.swapaxes(im, 0, 2) im = np.swapaxes(im, 0, 1) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = im * 255 im = im.astype(np.uint8) outputs = self.deepsort.update(bbox_xcycwh, cls_conf, out[0]["class_ids"], im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -2] track_class = outputs[:, -1] if self.submit: for box_idx in range(bbox_xyxy.shape[0]): o = meta[:][0] box = label_pb2.Label.Box() box.center_x = (bbox_xyxy[box_idx, 0] + bbox_xyxy[box_idx, 2]) / 2 box.center_y = (bbox_xyxy[box_idx, 1] + bbox_xyxy[box_idx, 3]) / 2 box.length = (bbox_xyxy[box_idx, 2] - bbox_xyxy[box_idx, 0]) box.width = (bbox_xyxy[box_idx, 3] - bbox_xyxy[box_idx, 1]) o.object.box.CopyFrom(box) o.score = 0.9 # CHECK THIS # Use correct type. o.object.type = to_waymo_classes[track_class[ box_idx]] # MAP THIS TO CORRECT CLASSES self.object_list.append(copy.deepcopy(o)) o.object.id = str(identities[box_idx]) self.object_list_tracks.append( copy.deepcopy(o)) # import pdb; pdb.set_trace() if self.args.save_path: draw_bboxes(im, bbox_xyxy, identities) if self.args.display: pass self.args.save_path = "cam_{}.avi".format(self.cam_id) if self.args.save_path: self.output.write(im) objects = metrics_pb2.Objects() # write object detection stuff for o in self.object_list: objects.objects.append(o) f = open("./output/detection/sub_camid_{}.bin".format(self.cam_id), 'ab') f.write(objects.SerializeToString()) f.close() objects = metrics_pb2.Objects() # write object detection stuff for o in self.object_list_tracks: objects.objects.append(o) f = open("./output/tracking/sub_camid_{}.bin".format(self.cam_id), 'ab') f.write(objects.SerializeToString()) f.close()
def train(opt): params = Params(f'projects/{opt.project}_crop.yml') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '1-' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) save_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") opt.saved_path = opt.saved_path + f'/{params.project_name}/crop/weights/{save_time}' opt.log_path = opt.log_path + f'/{params.project_name}/crop/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) print('save_path :', opt.saved_path) print('log_path :', opt.log_path) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] training_set = Project42Dataset(root_dir=os.path.join( opt.data_path, params.project_name, 'crop'), set=params.train_set, params=params, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ])) training_generator = DataLoader(training_set, **training_params) val_set = Project42Dataset(root_dir=os.path.join(opt.data_path, params.project_name, 'crop'), set=params.val_set, params=params, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef]) ])) val_generator = DataLoader(val_set, **val_params) # labels labels = training_set.labels print('label:', labels) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter(opt.log_path + f'/{save_time}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] ## train image show # for idx in range(len(imgs)): # showshow = imgs[idx].numpy() # print(showshow.shape) # showshow = showshow.transpose(1, 2, 0) # a = annot[idx].numpy().reshape(5, ) # img_show = cv2.rectangle(showshow, (a[0],a[1]), (a[2],a[3]), (0, 0, 0), 3) # cv2.imshow(f'{idx}_{params.obj_list[int(a[4])]}', img_show) # cv2.waitKey(1000) # cv2.destroyAllWindows() if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss, regression, classification, anchors = model( imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() # loss epoch_loss.append(float(loss)) # mAP threshold = 0.2 iou_threshold = 0.2 regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(imgs, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) mAP = mAP_score(annot, out, labels) mAP = mAP.results['mAP'] progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}. mAP: {:.2f}' .format(step, epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item(), mAP)) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) writer.add_scalars('mAP', {'train': mAP}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}.pth') print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss, regression, classification, anchors = model( imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss # mAP threshold = 0.2 iou_threshold = 0.2 regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(imgs, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) mAP = mAP_score(annot, out, labels) mAP = mAP.results['mAP'] print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}. mAP: {:.2f}' .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss, loss, mAP)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) writer.add_scalars('mAP', {'val': mAP}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
if __name__ == '__main__': SET_NAME = params['val_set'] VAL_GT = f'datasets/{params["project_name"]}/annotations/instances_{SET_NAME}.json' VAL_IMGS = f'datasets/{params["project_name"]}/{SET_NAME}/' MAX_IMAGES = 10000 coco_gt = COCO(VAL_GT) image_ids = coco_gt.getImgIds()[:MAX_IMAGES] if override_prev_results or not os.path.exists( f'{SET_NAME}_bbox_results.json'): model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) model.load_state_dict( torch.load(weights_path, map_location=torch.device('cpu'))) model.requires_grad_(False) model.eval() if use_cuda: model.cuda(gpu) if use_float16: model.half() image_ids = evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt, model) _eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')
img = cv2.cvtColor(imgs[i], cv2.COLOR_BGR2RGB) cv2.imshow('video', img) cv2.waitKey(1) if imwrite: cv2.imwrite(f'test/img_inferred_d{compound_coef}_{img_name}', imgs[i]) color_list = standard_to_bgr(STANDARD_COLORS) # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(f'logs/{project}/efficientdet-d{compound_coef}_{number}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() cap = cv2.VideoCapture(video_path) while cap.isOpened(): # load image hasFrames, image = cap.read() ori_imgs, framed_imgs, framed_metas = preprocess(image, max_size=input_size, video = True)
compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(r'weights\efficientdet-d3_315_35000.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes()
else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) #model = efficientdet.from_pretrained('efficientnet-b1') model = EfficientDetBackbone( compound_coef=compound_coef, num_classes=len(obj_list), # replace this part with your project's anchor config ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)], scales=[2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]) model.load_state_dict( torch.load('weights/efficientdet-d' + str(compound_coef) + '.pth')) #model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() start_time = time.time() last_loop = 0 for loop_cnt in range(0, 1000): with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform()
def excuteModel(videoname): # Video's path # set int to use webcam, set str to read from a video file if videoname is not None: video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4") else: video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4' compound_coef = 2 trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth' force_input_size = None # set None to use default size threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone( compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(trained_weights)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() # function for display # Box regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) writer = None # try to determine the total number of frames in the video file try: prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \ else cv2.CAP_PROP_FRAME_COUNT total = int(vs.get(prop)) print("[INFO] {} total frames in video".format(total)) # an error occurred while trying to determine the total # number of frames in the video file except: print("[INFO] could not determine # of frames in video") total = -1 path_out = os.path.join(os.path.dirname( os.path.abspath(__file__)), 'outvideo') path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4" path_asset = r"D:\GitHub\Detection\client\src\assets" for i in range(0, length): ret, frame = cap.read() if not ret: break # frame preprocessing ori_imgs, framed_imgs, framed_metas = preprocess_video( frame, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # result out = invert_affine(framed_metas, out) img_show = display(out, ori_imgs, obj_list) if writer is None: # initialize our video writer fourcc = 0x00000021 #fourcc = cv2.VideoWriter_fourcc(*'mp4v') if videoname is not None: path_result = os.path.join(path_out, f"{videoname}.mp4") else: path_result = os.path.join(path_out, "default.mp4") writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True) # write the output frame to disk writer.write(img_show) print("Processing data... " + str(round((i+1)/length, 3)*100) + " %") # show frame by frame #cv2.imshow('frame', img_show) if cv2.waitKey(1) & 0xFF == ord('q'): break print("[INFO] cleaning up...") writer.release() cap.release() cv2.destroyAllWindows() if videoname is not None: path_asset = os.path.join(path_asset, f"{videoname}.mp4") else: path_asset = os.path.join(path_asset, "default.mp4") copyfile(path_result, path_asset) return path_asset
coco_eval.summarize() if __name__ == '__main__': SET_NAME = params['val_set'] VAL_GT = f'datasets/{params["project_name"]}/{SET_NAME}.json' VAL_IMGS = f'datasets/{params["project_name"]}/{SET_NAME}/{SET_NAME}' MAX_IMAGES = 10000 coco_gt = COCO(VAL_GT) image_ids = coco_gt.getImgIds()[:MAX_IMAGES] if override_prev_results or not os.path.exists( f'{SET_NAME}_bbox_results.json'): model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) model.load_state_dict(torch.load(weights_path)) model.requires_grad_(False) model.eval() if use_cuda: model.cuda(gpu) if use_float16: model.half() evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt, model) # _eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')
class ObjectDetectionService(): def __init__(self, model_name, model_path): # effdet self.model_name = model_name self.model_path = model_path self.input_image_key = 'images' self.anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] self.anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] self.compound_coef = 0 self.threshold = 0.5 self.iou_threshold = 0.5 self.obj_list = [ '一次性快餐盒', '书籍纸张', '充电宝', '剩饭剩菜', '包', '垃圾桶', '塑料器皿', '塑料玩具', '塑料衣架', '大骨头', '干电池', '快递纸袋', '插头电线', '旧衣服', '易拉罐', '枕头', '果皮果肉', '毛绒玩具', '污损塑料', '污损用纸', '洗护用品', '烟蒂', '牙签', '玻璃器皿', '砧板', '筷子', '纸盒纸箱', '花盆', '茶叶渣', '菜帮菜叶', '蛋壳', '调料瓶', '软膏', '过期药物', '酒瓶', '金属厨具', '金属器皿', '金属食品罐', '锅', '陶瓷器皿', '鞋', '食用油桶', '饮料瓶', '鱼骨' ] self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = self.input_sizes[self.compound_coef] self.model = EfficientDetBackbone(compound_coef=self.compound_coef, num_classes=len(self.obj_list), ratios=self.anchor_ratios, scales=self.anchor_scales) self.model.load_state_dict(torch.load(self.model_path), strict=False) self.model.requires_grad_(False) self.model.eval() def _preprocess(self, data): preprocessed_data = {} for k, v in data.items(): for file_name, file_content in v.items(): ori_imgs, framed_imgs, framed_metas = preprocess( file_content, max_size=self.input_size) preprocessed_data[k] = [framed_imgs, framed_metas] return preprocessed_data def _inference(self, data): """ model inference function Here are a inference example of resnet, if you use another model, please modify this function """ framed_imgs, framed_metas = data[self.input_image_key] if torch.cuda.is_available(): x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) self.model = self.model.cuda() else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) #if use_float16: # model = model.half() with torch.no_grad(): features, regression, classification, anchors = self.model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.threshold, self.iou_threshold) out = invert_affine(framed_metas, out) result = OrderedDict() result['detection_classes'] = [] result['detection_scores'] = [] result['detection_boxes'] = [] for i in range(len(out)): if len(out[i]['rois']) == 0: continue for j in range(len(out[i]['rois'])): x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int) result['detection_boxes'].append([x1, y1, x2, y2]) obj = self.obj_list[out[i]['class_ids'][j]] result['detection_classes'].append(obj) score = float(out[i]['scores'][j]) result['detection_scores'].append(score) return result def _postprocess(self, data): return data def inference(self, data): ''' Wrapper function to run preprocess, inference and postprocess functions. Parameters ---------- data : map of object Raw input from request. Returns ------- list of outputs to be sent back to client. data to be sent back ''' pre_start_time = time.time() data = self._preprocess(data) infer_start_time = time.time() # Update preprocess latency metric pre_time_in_ms = (infer_start_time - pre_start_time) * 1000 print('preprocess time: ' + str(pre_time_in_ms) + 'ms') data = self._inference(data) infer_end_time = time.time() infer_in_ms = (infer_end_time - infer_start_time) * 1000 print('infer time: ' + str(infer_in_ms) + 'ms') data = self._postprocess(data) # Update inference latency metric post_time_in_ms = (time.time() - infer_end_time) * 1000 print('postprocess time: ' + str(post_time_in_ms) + 'ms') print('latency: ' + str(pre_time_in_ms + infer_in_ms + post_time_in_ms) + 'ms') data['latency_time'] = str( round(pre_time_in_ms + infer_in_ms + post_time_in_ms, 1)) + ' ms' return data '''
def train(opt): params = Params(f'projects/{opt.project}.yml') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] training_set = CocoDataset(root_dir=opt.data_path + params.project_name, set=params.train_set, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ])) training_generator = DataLoader(training_set, **training_params) val_set = CocoDataset(root_dir=opt.data_path + params.project_name, set=params.val_set, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef]) ])) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_anchors=9, num_classes=len(params.obj_list), compound_coef=opt.compound_coef) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 model.load_state_dict(torch.load(weights_path)) print( f'loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') if params.num_gpus > 0: model = model.cuda() model = CustomDataParallel(model, params.num_gpus) optimizer = torch.optim.AdamW(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) criterion = FocalLoss() best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): try: model.train() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: imgs = data['img'] annot = data['annot'] if params.num_gpus > 0: annot = annot.cuda() optimizer.zero_grad() _, regression, classification, anchors = model(imgs) cls_loss, reg_loss = criterion( classification, regression, anchors, annot, # imgs=imgs, obj_list=params.obj_list # uncomment this to debug ) loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 except Exception as e: print(traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus > 0: annot = annot.cuda() _, regression, classification, anchors = model(imgs) cls_loss, reg_loss = criterion(classification, regression, anchors, annot) loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss, loss.mean())) writer.add_scalars('Total_loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) # onnx export is not tested. # dummy_input = torch.rand(opt.batch_size, 3, 512, 512) # if torch.cuda.is_available(): # dummy_input = dummy_input.cuda() # if isinstance(model, nn.DataParallel): # model.module.backbone_net.model.set_swish(memory_efficient=False) # # torch.onnx.export(model.module, dummy_input, # os.path.join(opt.saved_path, 'signatrix_efficientdet_coco.onnx'), # verbose=False) # model.module.backbone_net.model.set_swish(memory_efficient=True) # else: # model.backbone_net.model.set_swish(memory_efficient=False) # # torch.onnx.export(model, dummy_input, # os.path.join(opt.saved_path, 'signatrix_efficientdet_coco.onnx'), # verbose=False) # model.backbone_net.model.set_swish(memory_efficient=True) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( 'Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, loss)) break writer.close() except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')