def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() test_set = CocoDataset(opt.data_path, 2017, "val", SSDTransformer(dboxes, (300, 300), val=True)) encoder = Encoder(dboxes) if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) for img, img_id, img_size, _, _ in test_set: if img is None: continue if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: path = test_set.coco.loadImgs(img_id)[0]["file_name"] output_img = cv2.imread( os.path.join(opt.data_path, "val2017", path)) height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = test_set.label_info[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) cv2.imwrite( "{}/{}_prediction.jpg".format(opt.output, path[:-4]), output_img)
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) img = Image.open(opt.input).convert("RGB") img, _, _, _ = transformer(img, None, torch.zeros(1,4), torch.zeros(1)) encoder = Encoder(dboxes) if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] output_img = cv2.imread(opt.input) if len(loc) > 0: height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle(output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText( output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) if opt.output is None: output = "{}_prediction.jpg".format(opt.input[:-4]) else: output = opt.output cv2.imwrite(output, output_img)
def train(train_loop_func, logger, args): # Check that GPUs are actually available use_cuda = not args.no_cuda train_samples = 118287 # Setup multi-GPU if necessary args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='smddp', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 if args.seed is None: args.seed = np.random.randint(1e4) if args.distributed: args.seed = (args.seed + torch.distributed.get_rank()) % 2**32 print("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(seed=args.seed) # Setup data, defaults dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) train_loader = get_train_loader(args, args.seed - 2**31) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) ssd300 = SSD300(backbone=ResNet(args.backbone, args.backbone_path)) args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32) start_epoch = 0 iteration = 0 loss_func = Loss(dboxes) if use_cuda: ssd300.cuda() loss_func.cuda() optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1) if args.amp: ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2') if args.distributed: ssd300 = DDP(ssd300) if args.checkpoint is not None: if os.path.isfile(args.checkpoint): load_checkpoint(ssd300.module if args.distributed else ssd300, args.checkpoint) checkpoint = torch.load(args.checkpoint, map_location=lambda storage, loc: storage.cuda(torch.cuda.current_device())) start_epoch = checkpoint['epoch'] iteration = checkpoint['iteration'] scheduler.load_state_dict(checkpoint['scheduler']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('Provided checkpoint is not path to a file') return inv_map = {v: k for k, v in val_dataset.label_map.items()} total_time = 0 if args.mode == 'evaluation': acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: print('Model precision {} mAP'.format(acc)) return mean, std = generate_mean_std(args) for epoch in range(start_epoch, args.epochs): start_epoch_time = time.time() scheduler.step() iteration = train_loop_func(ssd300, loss_func, epoch, optimizer, train_loader, val_dataloader, encoder, iteration, logger, args, mean, std) end_epoch_time = time.time() - start_epoch_time total_time += end_epoch_time if torch.distributed.get_rank() == 0: throughput = train_samples / end_epoch_time logger.update_epoch_time(epoch, end_epoch_time) logger.update_throughput_speed(epoch, throughput) if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.save and args.local_rank == 0: print("saving model...") obj = {'epoch': epoch + 1, 'iteration': iteration, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'label_map': val_dataset.label_info} if args.distributed: obj['model'] = ssd300.module.state_dict() else: obj['model'] = ssd300.state_dict() save_path = os.path.join(args.save, f'epoch_{epoch}.pt') torch.save(obj, save_path) logger.log('model path', save_path) train_loader.reset() if torch.distributed.get_rank() == 0: DLLogger.log((), { 'Total training time': '%.2f' % total_time + ' secs' }) logger.log_summary()
def main(opt): if torch.cuda.is_available(): torch.distributed.init_process_group(backend='nccl', init_method='env://') num_gpus = torch.distributed.get_world_size() torch.cuda.manual_seed(123) else: torch.manual_seed(123) num_gpus = 1 train_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } test_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": False, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } if opt.model == "ssd": dboxes = generate_dboxes(model="ssd") model = SSD(backbone=ResNet(), num_classes=len(coco_classes)) else: dboxes = generate_dboxes(model="ssdlite") model = SSDLite(backbone=MobileNetV2(), num_classes=len(coco_classes)) train_set = CocoDataset(opt.data_path, 2017, "train", SSDTransformer(dboxes, (300, 300), val=False)) train_loader = DataLoader(train_set, **train_params) test_set = CocoDataset(opt.data_path, 2017, "val", SSDTransformer(dboxes, (300, 300), val=True)) test_loader = DataLoader(test_set, **test_params) encoder = Encoder(dboxes) opt.lr = opt.lr * num_gpus * (opt.batch_size / 32) criterion = Loss(dboxes) optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) scheduler = MultiStepLR(optimizer=optimizer, milestones=opt.multistep, gamma=0.1) if torch.cuda.is_available(): model.cuda() criterion.cuda() if opt.amp: from apex import amp from apex.parallel import DistributedDataParallel as DDP model, optimizer = amp.initialize(model, optimizer, opt_level='O1') else: from torch.nn.parallel import DistributedDataParallel as DDP # It is recommended to use DistributedDataParallel, instead of DataParallel # to do multi-GPU training, even if there is only a single node. model = DDP(model) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) checkpoint_path = os.path.join(opt.save_folder, "SSD.pth") writer = SummaryWriter(opt.log_path) if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) first_epoch = checkpoint["epoch"] + 1 model.module.load_state_dict(checkpoint["model_state_dict"]) scheduler.load_state_dict(checkpoint["scheduler"]) optimizer.load_state_dict(checkpoint["optimizer"]) else: first_epoch = 0 for epoch in range(first_epoch, opt.epochs): train(model, train_loader, epoch, writer, criterion, optimizer, scheduler, opt.amp) evaluate(model, test_loader, epoch, writer, encoder, opt.nms_threshold) checkpoint = { "epoch": epoch, "model_state_dict": model.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict() } torch.save(checkpoint, checkpoint_path)
def build_predictor(model_file, backbone='resnet50'): ssd300 = SSD300(backbone=ResNet(backbone)) load_checkpoint(ssd300, model_file) return ssd300
def train(train_loop_func, logger, args): # Check that GPUs are actually available use_cuda = not args.no_cuda # Setup multi-GPU if necessary args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.N_gpu = torch.distributed.get_world_size() else: args.N_gpu = 1 if args.seed is None: args.seed = np.random.randint(1e4) if args.distributed: args.seed = (args.seed + torch.distributed.get_rank()) % 2**32 print("Using seed = {}".format(args.seed)) torch.manual_seed(args.seed) np.random.seed(seed=args.seed) # Setup data, defaults dboxes = dboxes300_coco() encoder = Encoder(dboxes) cocoGt = get_coco_ground_truth(args) train_loader = get_train_loader(args, args.seed - 2**31) val_dataset = get_val_dataset(args) val_dataloader = get_val_dataloader(val_dataset, args) ssd300 = SSD300(backbone=ResNet(args.backbone, args.backbone_path)) # args.learning_rate = args.learning_rate * args.N_gpu * (args.batch_size / 32) print(f"Actual starting LR: {args.learning_rate}") start_epoch = 0 iteration = 0 loss_func = Loss(dboxes) if use_cuda: ssd300.cuda() loss_func.cuda() # optimizer = torch.optim.SGD(tencent_trick(ssd300), lr=args.learning_rate, # momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) optimizer = torch.optim.AdamW(tencent_trick(ssd300), lr=args.learning_rate, betas=(0.8, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=True) # scheduler = MultiStepLR(optimizer=optimizer, milestones=args.multistep, gamma=0.1) # scheduler = CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=20, T_mult=1, eta_min=1e-6) scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=args.epochs, eta_min=1e-6) # scheduler = OneCycleLR(optimizer, max_lr=0.003, epochs=41, steps_per_epoch=173) # scheduler = CyclicLR(optimizer, base_lr=args.learning_rate, max_lr=2*args.learning_rate, # step_size_up=173*3, step_size_down=173*10) if args.amp: ssd300, optimizer = amp.initialize(ssd300, optimizer, opt_level='O2') if args.distributed: ssd300 = DDP(ssd300) if args.checkpoint is not None: if os.path.isfile(args.checkpoint): load_checkpoint(ssd300.module if args.distributed else ssd300, args.checkpoint) checkpoint = torch.load(args.checkpoint, map_location=lambda storage, loc: storage. cuda(torch.cuda.current_device())) start_epoch = checkpoint['epoch'] iteration = checkpoint['iteration'] scheduler.load_state_dict(checkpoint['scheduler']) optimizer.load_state_dict(checkpoint['optimizer']) else: print('Provided checkpoint is not path to a file') return inv_map = {v: k for k, v in val_dataset.label_map.items()} total_time = 0 if args.mode == 'evaluation': acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: print('Model precision {} mAP'.format(acc)) return mean, std = generate_mean_std(args) for epoch in range(start_epoch, args.epochs): start_epoch_time = time.time() # scheduler.step() iteration = train_loop_func(ssd300, loss_func, epoch, optimizer, scheduler, train_loader, val_dataloader, encoder, iteration, logger, args, mean, std) end_epoch_time = time.time() - start_epoch_time total_time += end_epoch_time # https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate scheduler.step() if args.local_rank == 0: logger.update_epoch_time(epoch, end_epoch_time) if epoch in args.evaluation: acc = evaluate(ssd300, val_dataloader, cocoGt, encoder, inv_map, args) if args.local_rank == 0: logger.update_epoch(epoch, acc) if args.save and args.local_rank == 0: print("saving model...") obj = { 'epoch': epoch + 1, 'iteration': iteration, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'label_map': val_dataset.label_info } if args.distributed: obj['model'] = ssd300.module.state_dict() else: obj['model'] = ssd300.state_dict() torch.save(obj, './models/epoch_{}.pt'.format(epoch)) train_loader.reset() print('total training time: {}'.format(total_time))
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) cap = cv2.VideoCapture(opt.input) if opt.output is None: output = "{}_prediction.mp4".format(opt.input[:-4]) else: output = opt.output height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) out = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"MJPG"), int(cap.get(cv2.CAP_PROP_FPS)), (width, height)) encoder = Encoder(dboxes) while cap.isOpened(): flag, frame = cap.read() output_frame = np.copy(frame) if flag: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: break frame = Image.fromarray(frame) frame, _, _, _ = transformer(frame, None, torch.zeros(1, 4), torch.zeros(1)) if torch.cuda.is_available(): frame = frame.cuda() with torch.no_grad(): ploc, plabel = model(frame.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_frame, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_frame, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_frame, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) out.write(output_frame) cap.release() out.release()