def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() test_set = CocoDataset(opt.data_path, 2017, "val", SSDTransformer(dboxes, (300, 300), val=True)) encoder = Encoder(dboxes) if os.path.isdir(opt.output): shutil.rmtree(opt.output) os.makedirs(opt.output) for img, img_id, img_size, _, _ in test_set: if img is None: continue if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: path = test_set.coco.loadImgs(img_id)[0]["file_name"] output_img = cv2.imread( os.path.join(opt.data_path, "val2017", path)) height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = test_set.label_info[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) cv2.imwrite( "{}/{}_prediction.jpg".format(opt.output, path[:-4]), output_img)
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) img = Image.open(opt.input).convert("RGB") img, _, _, _ = transformer(img, None, torch.zeros(1,4), torch.zeros(1)) encoder = Encoder(dboxes) if torch.cuda.is_available(): img = img.cuda() with torch.no_grad(): ploc, plabel = model(img.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] output_img = cv2.imread(opt.input) if len(loc) > 0: height, width, _ = output_img.shape loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle(output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText( output_img, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) if opt.output is None: output = "{}_prediction.jpg".format(opt.input[:-4]) else: output = opt.output cv2.imwrite(output, output_img)
draw.rectangle(xy=[l + 1. for l in box_location], outline=box_color) # Text (class label) text_size = font.getsize(box_label_name.upper()) text_location = [box_location[0] + 2., box_location[1] - text_size[1]] textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4.,box_location[1]] draw.rectangle(xy=textbox_location, fill=box_color) draw.text(xy=text_location, text=box_label_name.upper(), fill='white') annotated_image.save(args.image_save_path+'p_'+args.test_image.split('/')[-1]) del draw if __name__ == '__main__': args = get_infer_argument() set_cuda_dev(args.ngpu) print('Arguments for inference : ', args) # Load model checkpoint model = SSD('test', args) checkpoint = args.trained_model # '*.pth' _, model, _ = load_checkpoint(model, args.trained_model_path+checkpoint) model = model.cuda() model.eval() with torch.no_grad(): img_example = args.test_image # With absolute path (ex. /media/dataset/VOC2007/JPEGImages/000001.jpg) original_image = Image.open(img_example, mode='r') original_image = original_image.convert('RGB') detect_image(original_image, args) print('Detect image finished!')
def main(opt): if torch.cuda.is_available(): torch.distributed.init_process_group(backend='nccl', init_method='env://') num_gpus = torch.distributed.get_world_size() torch.cuda.manual_seed(123) else: torch.manual_seed(123) num_gpus = 1 train_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } test_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": False, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } if opt.model == "ssd": dboxes = generate_dboxes(model="ssd") model = SSD(backbone=ResNet(), num_classes=len(coco_classes)) else: dboxes = generate_dboxes(model="ssdlite") model = SSDLite(backbone=MobileNetV2(), num_classes=len(coco_classes)) train_set = CocoDataset(opt.data_path, 2017, "train", SSDTransformer(dboxes, (300, 300), val=False)) train_loader = DataLoader(train_set, **train_params) test_set = CocoDataset(opt.data_path, 2017, "val", SSDTransformer(dboxes, (300, 300), val=True)) test_loader = DataLoader(test_set, **test_params) encoder = Encoder(dboxes) opt.lr = opt.lr * num_gpus * (opt.batch_size / 32) criterion = Loss(dboxes) optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) scheduler = MultiStepLR(optimizer=optimizer, milestones=opt.multistep, gamma=0.1) if torch.cuda.is_available(): model.cuda() criterion.cuda() if opt.amp: from apex import amp from apex.parallel import DistributedDataParallel as DDP model, optimizer = amp.initialize(model, optimizer, opt_level='O1') else: from torch.nn.parallel import DistributedDataParallel as DDP # It is recommended to use DistributedDataParallel, instead of DataParallel # to do multi-GPU training, even if there is only a single node. model = DDP(model) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) checkpoint_path = os.path.join(opt.save_folder, "SSD.pth") writer = SummaryWriter(opt.log_path) if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) first_epoch = checkpoint["epoch"] + 1 model.module.load_state_dict(checkpoint["model_state_dict"]) scheduler.load_state_dict(checkpoint["scheduler"]) optimizer.load_state_dict(checkpoint["optimizer"]) else: first_epoch = 0 for epoch in range(first_epoch, opt.epochs): train(model, train_loader, epoch, writer, criterion, optimizer, scheduler, opt.amp) evaluate(model, test_loader, epoch, writer, encoder, opt.nms_threshold) checkpoint = { "epoch": epoch, "model_state_dict": model.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict() } torch.save(checkpoint, checkpoint_path)
def main(opt): if torch.cuda.is_available(): torch.cuda.manual_seed(123) else: torch.manual_seed(123) train_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } eval_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": False, "num_workers": opt.num_workers, "collate_fn": collate_fn } dboxes = generate_dboxes() model = SSD() train_set = OIDataset(SimpleTransformer(dboxes), train=True) train_loader = DataLoader(train_set, **train_params) val_set = OIDataset(SimpleTransformer(dboxes, eval=True), validation=True) val_loader = DataLoader(val_set, **eval_params) encoder = Encoder(dboxes) opt.lr = opt.lr * (opt.batch_size / 32) criterion = Loss(dboxes) optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) scheduler = MultiStepLR(optimizer=optimizer, milestones=opt.multistep, gamma=0.1) if torch.cuda.is_available(): model.cuda() criterion.cuda() model = torch.nn.DataParallel(model) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.save_folder): os.makedirs(opt.save_folder) checkpoint_path = os.path.join(opt.save_folder, "SSD.pth") writer = SummaryWriter(opt.log_path) if os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path) first_epoch = checkpoint["epoch"] + 1 model.module.load_state_dict(checkpoint["model_state_dict"]) scheduler.load_state_dict(checkpoint["scheduler"]) optimizer.load_state_dict(checkpoint["optimizer"]) else: first_epoch = 0 for epoch in range(first_epoch, opt.epochs): train(model, train_loader, epoch, writer, criterion, optimizer, scheduler) evaluate(model, val_loader, encoder, opt.nms_threshold) checkpoint = { "epoch": epoch, "model_state_dict": model.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict() } torch.save(checkpoint, checkpoint_path)
def evaluate_test_dataset(): test_params = { "batch_size": 4, "shuffle": True, "drop_last": False, "num_workers": 4, "collate_fn": collate_fn } test_set = OIDataset(transformer, test=True) test_loader = DataLoader(test_set, **test_params) evaluate(model, test_loader, encoder, 0.45) if __name__ == "__main__": model = SSD() checkpoint = torch.load(model_path) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SimpleTransformer(dboxes, eval=True) encoder = Encoder(dboxes) # evaluate_test_dataset() for image in tqdm(os.listdir(input_folder)): draw_prediction_one(input_folder + image)
def test(opt): model = SSD(backbone=ResNet()) checkpoint = torch.load(opt.pretrained_model) model.load_state_dict(checkpoint["model_state_dict"]) if torch.cuda.is_available(): model.cuda() model.eval() dboxes = generate_dboxes() transformer = SSDTransformer(dboxes, (300, 300), val=True) cap = cv2.VideoCapture(opt.input) if opt.output is None: output = "{}_prediction.mp4".format(opt.input[:-4]) else: output = opt.output height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) out = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"MJPG"), int(cap.get(cv2.CAP_PROP_FPS)), (width, height)) encoder = Encoder(dboxes) while cap.isOpened(): flag, frame = cap.read() output_frame = np.copy(frame) if flag: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) else: break frame = Image.fromarray(frame) frame, _, _, _ = transformer(frame, None, torch.zeros(1, 4), torch.zeros(1)) if torch.cuda.is_available(): frame = frame.cuda() with torch.no_grad(): ploc, plabel = model(frame.unsqueeze(dim=0)) result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0] loc, label, prob = [r.cpu().numpy() for r in result] best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1) loc = loc[best] label = label[best] prob = prob[best] if len(loc) > 0: loc[:, 0::2] *= width loc[:, 1::2] *= height loc = loc.astype(np.int32) for box, lb, pr in zip(loc, label, prob): category = coco_classes[lb] color = colors[lb] xmin, ymin, xmax, ymax = box cv2.rectangle(output_frame, (xmin, ymin), (xmax, ymax), color, 2) text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_frame, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText(output_frame, category + " : %.2f" % pr, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) out.write(output_frame) cap.release() out.release()
def main(agrs): start_epoch = 0 # Initialize model or load trained checkpoint if args.resume: start_epoch, model, optimizer = load_checkpoint(args.trained_model) else: model = SSD('train', args) optimizer = init_optimizer(model, args) # Move to default device and set 'train' mode model = model.cuda() model.train() # Create multibox loss criterion = MultiBoxLoss(PriorBox().forward().cuda(), args.overlap_threshold, args.negpos_ratio, args.alpha) # VOC dataloaders train_dataset = VOCxx('train', args.dataroot, args.datayears, args.datanames, discard_difficult=args.discard_difficult, use_augment=args.use_augment) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=train_dataset.collate_fn, num_workers=1, pin_memory=True) # Loop and decay params epochs = args.iterations // (len(train_dataset) // args.batch_size) decay_iters = [int(it) for it in args.lr_decay.split(',')] decay_lr_at = [ it // (len(train_dataset) // args.batch_size) for it in decay_iters ] print('total length of dataset : ', len(train_dataset)) print('total epochs : ', epochs) print('decay lr at : ', decay_lr_at) # Epochs loc_losses, conf_losses = [], [] for epoch in range(start_epoch, epochs): # Decay learning rate at particular epochs if epoch in decay_lr_at: optimizer = adjust_lr(optimizer) for i, (images, targets) in enumerate(train_loader): # Move to default device images = images.cuda() targets = [t.cuda() for t in targets] # Forward prop preds = model(images) # Loss loc_loss, conf_loss = criterion(preds, targets) loss = loc_loss + conf_loss # Backward prop optimizer.zero_grad() loss.backward() # Clip gradients if necessary if args.clip_grad: clip_gradient(model.parameters(), args.clip_grad) # Update model optimizer.step() # Print status if i % 200 == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss : {loss:.4f}\t'.format(epoch, i, len(train_loader), loss=loss.item())) loc_losses.append(loc_loss.item()) conf_losses.append(conf_loss.item()) # Plot losses plot_losses(loc_losses, 'regression', args.model_save_name) plot_losses(conf_losses, 'classification', args.model_save_name) # Save checkpoint save_checkpoint(epoch, model, optimizer, args.model_save_name)