def eval(net, test_num=10000): net.eval() def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(opt.img_size, opt.img_size)) img = transforms.Compose([transforms.ToTensor(), caffe_normalize])(img) return img, boxes, labels dataset = ListDataset(root=opt.eval_img_root, list_file=opt.eval_img_list, transform=transform) box_coder = SSDBoxCoder(net.module) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] # with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: # gt_difficults = [] # for line in f.readlines(): # line = line.strip().split() # d = np.array([int(x) for x in line[1:]]) # gt_difficults.append(d) nums_img = dataset.__len__() for i in tqdm(range(nums_img)): inputs, box_targets, label_targets = dataset.__getitem__(i) gt_boxes.append(box_targets) gt_labels.append(label_targets) inputs = inputs.unsqueeze(0) with torch.no_grad(): loc_preds, cls_preds = net(Variable(inputs.cuda())) box_preds, label_preds, score_preds = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.1) pred_boxes.append(box_preds) pred_labels.append(label_preds) pred_scores.append(score_preds) aps = (voc_eval(pred_boxes, pred_labels, pred_scores, gt_boxes, gt_labels, gt_difficults=None, iou_thresh=0.5, use_07_metric=False)) net.train() return aps
def __init__(self, load_path): self.img = None print('Loading model..') self.net = DSOD(num_classes=10) self.net.cuda() self.net = torch.nn.DataParallel(self.net, device_ids=range( torch.cuda.device_count())) self.net.load_state_dict(torch.load(load_path)['net'], False) self.net.eval() self.box_coder = SSDBoxCoder(self.net.module)
img_size = net.input_size preprocess = Compose([ ColorJitter(0.5, 0.5, 0.3), ColorJitterLWIR(contrast=0.5) ]) # preprocess.add( [ SynthFail('bug1.png', (512, 640), 'T-') ] ) # preprocess.add( [ FaultTolerant([0.5, 0.5]) ] ) transforms = Compose([ \ RandomHorizontalFlip(), \ RandomResizedCrop( img_size, scale=(0.25, 2.0), ratio=(0.8, 1.2)), \ ToTensor(), \ Normalize([0.3465,0.3219,0.2842], [0.2358,0.2265,0.2274], 'R'), \ Normalize([0.1598], [0.0813], 'T') ]) # trainset = KAISTPed('train-all-04.txt', SSDBoxCoder(net), img_transform=preprocess, co_transform=transforms) trainset = KAISTPedSeq('train-all-02.txt', SSDBoxCoder(net), 3, img_transform=preprocess, co_transform=transforms) trainloader = torch.utils.data.DataLoader(trainset, collate_fn=SeqCollate, batch_size=2, shuffle=True, num_workers=32) ori_size = (512, 640) tensor2image = Compose( [UnNormalize((0.3465,0.3219,0.2842), (0.2358,0.2265,0.2274)), ToPILImage('RGB'), Resize(ori_size)]) tensor2lwir = Compose( [UnNormalize([0.1598], [0.0813]), ToPILImage('L'), Resize(ori_size)]) coder = SSDBoxCoder(net) fig, ax = plt.subplots(figsize=(6,5)) # ################################################################################################ # ### Compute mean/std for CLS/REG
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.train_img_root, list_file=opt.train_img_list, transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_worker, pin_memory=True) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=1e-4) best_map_ = 0 best_loss = 1e100 start_epoch = 0 if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.load_state_dict(checkpoint['net']) best_loss = checkpoint['map'] start_epoch = checkpoint['epoch'] + 1 print('start_epoch = ', start_epoch, 'best_loss = ', best_loss) for epoch in range(start_epoch, start_epoch + 100): print('\nEpoch: ', epoch) net.train() train_loss = 0 optimizer.zero_grad() ix = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) loc_preds, cls_preds = net(inputs) ix += 1 loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() train_loss += loss.data.item() current_loss = train_loss / (1 + batch_idx) if (batch_idx + 1) % (opt.iter_size) == 0: for name, p in net.named_parameters(): p.grad.data.div_(ix) ix = 0 optimizer.step() optimizer.zero_grad() if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', current_loss) # img = predict(net, box_coder, os.path.join(opt.train_img_root, trainset.fnames[batch_idx])) # vis.img('predict', np.array(img).transpose(2, 0, 1)) # if os.path.exists(opt.debug_file): # import ipdb # ipdb.set_trace() print('current_loss: ', current_loss, 'best_loss: ', best_loss) if (epoch + 1) % 20 == 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 if (epoch + 1) % opt.save_state_every == 0: state = { 'net': net.state_dict(), 'map': current_loss, 'epoch': epoch, } torch.save(state, opt.checkpoint + '%s.pth' % epoch) if current_loss < best_loss: best_loss = current_loss print('saving model at epoch: ', epoch) state = { 'net': net.state_dict(), 'map': best_loss, 'epoch': epoch, } torch.save(state, opt.checkpoint + 'dsod.pth')
[ColorJitter(0.5, 0.5, 0.3), ColorJitterLWIR(contrast=0.5)]) # preprocess.add( [ SynthFail('bug1.png', (512, 640), 'T-') ] ) # preprocess.add( [ FaultTolerant([0.5, 0.5]) ] ) transforms = Compose([ \ RandomHorizontalFlip(), \ RandomResizedCrop( img_size, scale=(0.25, 2.0), ratio=(0.8, 1.2)), \ ToTensor(), \ Normalize([0.3465,0.3219,0.2842], [0.2358,0.2265,0.2274], 'R'), \ Normalize([0.1598], [0.0813], 'T') ]) # trainset = KAISTPed('train-all-04.txt', SSDBoxCoder(net), img_transform=preprocess, co_transform=transforms) trainset = KAISTPedSeq('train-all-02.txt', SSDBoxCoder(net), 3, img_transform=preprocess, co_transform=transforms) trainloader = torch.utils.data.DataLoader(trainset, collate_fn=SeqCollate, batch_size=2, shuffle=True, num_workers=32) ori_size = (512, 640) tensor2image = Compose([ UnNormalize((0.3465, 0.3219, 0.2842), (0.2358, 0.2265, 0.2274)), ToPILImage('RGB'), Resize(ori_size)
def caffe_normalize(x): return transforms.Compose([ transforms.Lambda(lambda x:255*x[[2,1,0]]) , transforms.Normalize([104,117,123], (1,1,1)), # make it the same as caffe # bgr and 0-255 ])(x) def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(opt.img_size, opt.img_size)) img = transforms.Compose([ transforms.ToTensor(), caffe_normalize ])(img) return img, boxes, labels dataset = ListDataset(root=opt.eval_img_root, list_file=opt.eval_img_list, transform=transform) box_coder = SSDBoxCoder(net.module) nums_img = dataset.__len__() idx = random.randint(0, nums_img) inputs, _, _ = dataset.__getitem__(idx) inputs = inputs.unsqueeze(0) with torch.no_grad(): loc_preds, cls_preds = net(Variable(inputs.cuda())) boxes, labels, scores = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.5) img = Image.open(opt.eval_img_root + dataset.fnames[idx]) sw = float(img.size[0])/float(opt.img_size) sh = float(img.size[1])/float(opt.img_size) boxes = boxes.type(torch.FloatTensor) * torch.tensor([sw, sh, sw, sh]) draw = ImageDraw.Draw(img)
class Detect(): def __init__(self, load_path): self.img = None print('Loading model..') self.net = DSOD(num_classes=10) self.net.cuda() self.net = torch.nn.DataParallel(self.net, device_ids=range( torch.cuda.device_count())) self.net.load_state_dict(torch.load(load_path)['net'], False) self.net.eval() self.box_coder = SSDBoxCoder(self.net.module) def caffe_normalize(self, x): return transforms.Compose([ transforms.Lambda(lambda x: 255 * x[[2, 1, 0]]), transforms.Normalize([104, 117, 123], (1, 1, 1)), # make it the same as caffe # bgr and 0-255 ])(x) def transform(self, img, boxes): img, boxes = resize(img, boxes, size=(opt.img_size, opt.img_size)) img = transforms.Compose([transforms.ToTensor(), self.caffe_normalize])(img) return img, boxes def py_cpu_nms(self, dets, score, thresh): x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] areas = (y2 - y1 + 1) * (x2 - x1 + 1) scores = score keep = [] index = scores.argsort()[::-1] while index.size > 0: i = index[ 0] # every time the first is the biggst, and add it directly keep.append(i) # calculate the points of overlap x11 = np.maximum(x1[i], x1[index[1:]]) y11 = np.maximum(y1[i], y1[index[1:]]) x22 = np.minimum(x2[i], x2[index[1:]]) y22 = np.minimum(y2[i], y2[index[1:]]) w = np.maximum(0, x22 - x11 + 1) # the weights of overlap h = np.maximum(0, y22 - y11 + 1) # the height of overlap overlaps = w * h ious = overlaps / (areas[i] + areas[index[1:]] - overlaps) idx = np.where(ious <= thresh)[0] # print(idx) index = index[idx + 1] # because index start from 1 return keep def detect(self, img): print('Processing img..') self.img = Image.open(img) if self.img.mode != 'RGB': self.img = self.img.convert('RGB') boxes = None inputs, boxes = self.transform(self.img, boxes) inputs = inputs.unsqueeze(0) with torch.no_grad(): loc_preds, cls_preds = self.net(Variable(inputs.cuda())) box_preds, label_preds, score_preds = self.box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.1) score = score_preds.numpy() keep = self.py_cpu_nms(box_preds.numpy(), score, thresh=0.3) box_preds = box_preds.numpy()[keep] score_preds = score_preds.numpy()[keep] label_preds = label_preds.numpy()[keep] sw = float(self.img.size[0]) / float(opt.img_size) sh = float(self.img.size[1]) / float(opt.img_size) boxes = box_preds * np.array([sw, sh, sw, sh]) index = np.argmax(score_preds) x1 = int(boxes[index][0]) x2 = int(boxes[index][2]) y1 = int(boxes[index][1]) y2 = int(boxes[index][3]) if x1 < 0: x1 = 0 if y1 < 0: y1 = 0 if x2 < 0: x2 = 0 if y2 < 0: y2 = 0 if x1 >= x2 or y1 >= y2: print('no detection') else: return x1, y1, x2, y2