def eval(net, test_num=10000): net.eval() def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(opt.img_size, opt.img_size)) img = transforms.Compose([transforms.ToTensor(), caffe_normalize])(img) return img, boxes, labels dataset = ListDataset(root=opt.eval_img_root, list_file=opt.eval_img_list, transform=transform) box_coder = SSDBoxCoder(net.module) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] # with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: # gt_difficults = [] # for line in f.readlines(): # line = line.strip().split() # d = np.array([int(x) for x in line[1:]]) # gt_difficults.append(d) nums_img = dataset.__len__() for i in tqdm(range(nums_img)): inputs, box_targets, label_targets = dataset.__getitem__(i) gt_boxes.append(box_targets) gt_labels.append(label_targets) inputs = inputs.unsqueeze(0) with torch.no_grad(): loc_preds, cls_preds = net(Variable(inputs.cuda())) box_preds, label_preds, score_preds = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.1) pred_boxes.append(box_preds) pred_labels.append(label_preds) pred_scores.append(score_preds) aps = (voc_eval(pred_boxes, pred_labels, pred_scores, gt_boxes, gt_labels, gt_difficults=None, iou_thresh=0.5, use_07_metric=False)) net.train() return aps
def prepare_data(ssd_box_coder, batch_size, n_workers, img_size, data_paths, drop_last=False): trainset = ListDataset( root=data_paths['vda_root'], list_file=data_paths['vda_list_train'], transform=partial( transform_train, ssd_box_coder=ssd_box_coder, img_size=img_size ) ) testset = ListDataset( root=data_paths['vda_root'], list_file=data_paths['vda_list_test'], transform=partial( transform_test, ssd_box_coder=ssd_box_coder, img_size=img_size ) ) coco_trainset = ListDataset( root=data_paths['coco_root'], list_file=data_paths['coco_list_train'], transform=partial(transform_train_target, img_size=img_size) ) coco_trainset_full = ListDataset( root=data_paths['coco_root'], list_file=data_paths['coco_list_train'], transform=partial( transform_train, ssd_box_coder=ssd_box_coder, img_size=img_size ) ) coco_testset = ListDataset( root=data_paths['coco_root_test'], list_file=data_paths['coco_list_test'], transform=partial( transform_test, ssd_box_coder=ssd_box_coder, img_size=img_size ) ) source_train_loader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True, num_workers=n_workers, drop_last=drop_last ) target_train_loader = torch.utils.data.DataLoader( coco_trainset, batch_size=batch_size, shuffle=True, num_workers=n_workers, drop_last=drop_last ) target_full_train_loader = torch.utils.data.DataLoader( coco_trainset_full, batch_size=batch_size, shuffle=True, num_workers=n_workers ) testloader = torch.utils.data.DataLoader( testset, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=drop_last ) cocoloader = torch.utils.data.DataLoader( coco_testset, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=drop_last ) vda_test_boxes_labels, coco_test_boxes_labels = get_test_bboxes( data_paths['vda_list_test'], data_paths['vda_root'], data_paths['coco_list_test'], data_paths['coco_root_test'], ) return namedtuple('Data', locals().keys())(**locals())
def box_label_list(root_dir, list_file): return list( ListDataset( root=root_dir, list_file=list_file, transform=lambda img, boxes, labels: (resize(img, boxes, size=(img_size, img_size))[1], labels)))
def eval(net): net.eval() def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels dataset = ListDataset(root=args.data_root, \ list_file=args.voc07_test, transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8) box_coder = SSDBoxCoder(net) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: gt_difficults = [] for line in f.readlines(): line = line.strip().split() d = np.array([int(x) for x in line[1:]]) gt_difficults.append(d) for i, (inputs, box_targets, label_targets) in enumerate(dataloader): print('%d/%d' % (i, len(dataloader))) gt_boxes.append(box_targets.squeeze(0)) gt_labels.append(label_targets.squeeze(0)) loc_preds, cls_preds = net(Variable(inputs.cuda(), volatile=True)) box_preds, label_preds, score_preds = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.01) pred_boxes.append(box_preds) pred_labels.append(label_preds) pred_scores.append(score_preds) aps = (voc_eval(pred_boxes, pred_labels, pred_scores, gt_boxes, gt_labels, gt_difficults, iou_thresh=0.5, use_07_metric=True)) net.train() return aps
def evaluate(net, img_dir, list_file, img_size, test_code): net.cuda() net.eval() def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels print('Loading dataset..') dataset = ListDataset(root=img_dir, list_file=list_file, transform=transform) if test_code: dataset.num_imgs = 1 dl = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2) box_coder = SSDBoxCoder(net) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] tqdm_dl = tqdm(dl, desc="Evaluate", ncols=0) for i, (inputs, box_targets, label_targets) in enumerate(tqdm_dl): gt_boxes.append(box_targets.squeeze(0)) gt_labels.append(label_targets.squeeze(0)) loc_preds, cls_preds = net(Variable(inputs.cuda(), volatile=True)) box_preds, label_preds, score_preds = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.01) pred_boxes.append(box_preds) pred_labels.append(label_preds) pred_scores.append(score_preds) ap_map_dict = voc_eval(pred_boxes, pred_labels, pred_scores, gt_boxes, gt_labels, iou_thresh=0.5, use_07_metric=False) return ap_map_dict
return img, boxes, labels def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) enc_boxes, enc_labels = box_coder.encode(boxes, labels) return img, enc_boxes, enc_labels trainset = ListDataset( root='/scratch2/mytmp/render_detection_visda', list_file='/scratch2/mytmp/render_detection_result/listdataset/visda18' '-detection-train.txt', transform=transform_train) testset = ListDataset( root='/scratch2/mytmp/render_detection_visda', list_file='/scratch2/mytmp/render_detection_result/listdataset/visda18' '-detection-test.txt', transform=transform_test) cocoset = ListDataset( root='/scratch2/data/coco/train2014', list_file= '/scratch2/mytmp/render_detection_result/listdataset/coco-train-short.txt', transform=transform_test)
net.eval() print('Preparing dataset..') img_size = 512 def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels dataset = ListDataset(root='/home/claude.cy/.data/all_images', \ list_file='torchcv/datasets/voc/voc07_test.txt', transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2) box_coder = SSDBoxCoder(net) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: gt_difficults = []
print('Loading image..') def caffe_normalize(x): return transforms.Compose([ transforms.Lambda(lambda x:255*x[[2,1,0]]) , transforms.Normalize([104,117,123], (1,1,1)), # make it the same as caffe # bgr and 0-255 ])(x) def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(opt.img_size, opt.img_size)) img = transforms.Compose([ transforms.ToTensor(), caffe_normalize ])(img) return img, boxes, labels dataset = ListDataset(root=opt.eval_img_root, list_file=opt.eval_img_list, transform=transform) box_coder = SSDBoxCoder(net.module) nums_img = dataset.__len__() idx = random.randint(0, nums_img) inputs, _, _ = dataset.__getitem__(idx) inputs = inputs.unsqueeze(0) with torch.no_grad(): loc_preds, cls_preds = net(Variable(inputs.cuda())) boxes, labels, scores = box_coder.decode( loc_preds.cpu().data.squeeze(), F.softmax(cls_preds.squeeze(), dim=1).cpu().data, score_thresh=0.5) img = Image.open(opt.eval_img_root + dataset.fnames[idx]) sw = float(img.size[0])/float(opt.img_size) sh = float(img.size[1])/float(opt.img_size) boxes = boxes.type(torch.FloatTensor) * torch.tensor([sw, sh, sw, sh])
print('Preparing dataset..') img_size = 512 def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels dataset = ListDataset(root=args.root / 'voc_all_images', list_file='torchcv/datasets/voc/voc07_test.txt', transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2) box_coder = FPNSSDBoxCoder() pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: gt_difficults = []
fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root='/home/liukuang/data/kitti/training/image_2', \ list_file='torchcv/datasets/kitti/kitti12_train.txt', \ transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root='/home/liukuang/data/kitti/training/image_2', \ list_file='torchcv/datasets/kitti/kitti12_val.txt', \ transform=transform_test)
return transforms.Compose([ transforms.Lambda(lambda x: 255 * x[[2, 1, 0]]), transforms.Normalize([104, 117, 123], (1, 1, 1)), # make it the same as caffe # bgr and 0-255 ])(x) def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(opt.img_size, opt.img_size)) img = transforms.Compose([transforms.ToTensor(), caffe_normalize])(img) return img, boxes, labels dataset = ListDataset(root=opt.eval_img_root, list_file=opt.eval_img_list, transform=transform) box_coder = SSDBoxCoder(net.module) pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] #with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: # gt_difficults = [] # for line in f.readlines(): # line = line.strip().split() # d = np.array([int(x) for x in line[1:]]) # gt_difficults.append(d)
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.train_img_root, list_file=opt.train_img_list, transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_worker, pin_memory=True) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=1e-4) best_map_ = 0 best_loss = 1e100 start_epoch = 0 if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.load_state_dict(checkpoint['net']) best_loss = checkpoint['map'] start_epoch = checkpoint['epoch'] + 1 print('start_epoch = ', start_epoch, 'best_loss = ', best_loss) for epoch in range(start_epoch, start_epoch + 100): print('\nEpoch: ', epoch) net.train() train_loss = 0 optimizer.zero_grad() ix = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) loc_preds, cls_preds = net(inputs) ix += 1 loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() train_loss += loss.data.item() current_loss = train_loss / (1 + batch_idx) if (batch_idx + 1) % (opt.iter_size) == 0: for name, p in net.named_parameters(): p.grad.data.div_(ix) ix = 0 optimizer.step() optimizer.zero_grad() if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', current_loss) # img = predict(net, box_coder, os.path.join(opt.train_img_root, trainset.fnames[batch_idx])) # vis.img('predict', np.array(img).transpose(2, 0, 1)) # if os.path.exists(opt.debug_file): # import ipdb # ipdb.set_trace() print('current_loss: ', current_loss, 'best_loss: ', best_loss) if (epoch + 1) % 20 == 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 if (epoch + 1) % opt.save_state_every == 0: state = { 'net': net.state_dict(), 'map': current_loss, 'epoch': epoch, } torch.save(state, opt.checkpoint + '%s.pth' % epoch) if current_loss < best_loss: best_loss = current_loss print('saving model at epoch: ', epoch) state = { 'net': net.state_dict(), 'map': best_loss, 'epoch': epoch, } torch.save(state, opt.checkpoint + 'dsod.pth')
img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset( root='/data/deva/recaptcha_data/labelled', list_file=['/data/deva/recaptcha_data/labelled/bbox_dataset_train.txt'], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset( root='/data/deva/recaptcha_data/labelled',
img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123,116,103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size,img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)) ])(img) # print ("labels: ") # print (labels.size()) boxes, labels = box_coder.encode(boxes, labels) # print (labels.size()) return img, boxes, labels trainset = ListDataset(root='/home/ysdu/hardwareDisk/ysduDir/voc/voc_all_images', list_file=['torchcv/datasets/voc/voc07_trainval.txt', 'torchcv/datasets/voc/voc12_trainval.txt'], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size,img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root='/home/ysdu/hardwareDisk/ysduDir/voc/voc_all_images', list_file='torchcv/datasets/voc/voc07_test.txt', transform=transform_test)
def transform_train(img, boxes, labels): img = random_distort(img) if random.random() < 0.5: img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123,116,103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size,img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root='/search/odin/liukuang/data/voc_all_images', list_file=['torchcv/datasets/voc/voc07_trainval.txt', 'torchcv/datasets/voc/voc12_trainval.txt'], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size,img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root='/search/odin/liukuang/data/voc_all_images', list_file='torchcv/datasets/voc/voc07_test.txt', transform=transform_test)
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) start_epoch = 0 # start from epoch 0 or last epoch if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.load_state_dict(checkpoint['net']) # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.data_root, list_file=[opt.voc07_trainval, opt.voc12_trainval], transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=8) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=5e-4) best_map_ = 0 for epoch in range(start_epoch, start_epoch + 200): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) optimizer.zero_grad() loc_preds, cls_preds = net(inputs) loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() optimizer.step() train_loss += loss.data[0] if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', train_loss / (batch_idx + 1)) img = predict( net, box_coder, os.path.join(opt.data_root, trainset.fnames[batch_idx])) vis.img('predict', np.array(img).transpose(2, 0, 1)) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() aps = eval(net.module, test_num=epoch * 100 + 100) map_ = aps['map'] if map_ > best_map_: print('Saving..') state = { 'net': net.state_dict(), 'map': best_map_, 'epoch': epoch, } best_map_ = map_ if not os.path.isdir(os.path.dirname(opt.checkpoint)): os.mkdir(os.path.dirname(opt.checkpoint)) torch.save(state, opt.checkpoint + '/%s.pth' % best_map_)
for ii in range(len(labels_tmp)): if labels_tmp[ii] != 0: new_boxes.append(boxes_tmp[ii]) new_labels.append(labels_tmp[ii]) else: att_box.append(boxes_tmp[ii]) boxes = torch.from_numpy(np.array(new_boxes)) labels = torch.from_numpy(np.array(new_labels)) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels, att_box #boxes_tmp, labels_tmp trainset = ListDataset(root='/home/user/Mayank/box-attention/data/VOC/VOCdevkit/VOC2012/JPEGImages', \ list_file='torchcv/datasets/voc/voc12_trainval.txt', \ transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size,img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root='/home/user/Mayank/box-attention/data/VOC/VOCdevkit/VOC2012/JPEGImages', \ list_file='torchcv/datasets/voc/voc12_test.txt', \ transform=transform_test)
return img, boxes, labels def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=img_size, max_size=img_size) img = pad(img, (img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root='/home/eee/ug/15085073/Projects/GRID/data/images', list_file='torchcv/datasets/grid/training.txt', transform=transform_train) testset = ListDataset(root='/home/eee/ug/15085073/Projects/GRID/data/images', list_file='torchcv/datasets/grid/validation.txt', transform=transform_test) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=8) testloader = torch.utils.data.DataLoader(testset, batch_size=8, shuffle=False, num_workers=8) # Model
def main(**kwargs): opt._parse(kwargs) vis = Visualizer(env=opt.env) # Model print('==> Building model..') net = DSOD(num_classes=21) start_epoch = 0 # start from epoch 0 or last epoch # Dataset print('==> Preparing dataset..') box_coder = SSDBoxCoder(net) trainset = ListDataset(root=opt.data_root, list_file=[opt.voc07_trainval, opt.voc12_trainval], transform=Transform(box_coder, True)) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=8, pin_memory=True) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True if opt.load_path is not None: print('==> Resuming from checkpoint..') checkpoint = torch.load(opt.load_path) net.module.load_state_dict(checkpoint['net']) criterion = SSDLoss(num_classes=21) optimizer = optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=5e-4) best_map_ = 0 best_loss = 1e100 for epoch in range(start_epoch, start_epoch + 200): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 optimizer.zero_grad() ix = 0 for batch_idx, (inputs, loc_targets, cls_targets) in tqdm(enumerate(trainloader)): inputs = Variable(inputs.cuda()) loc_targets = Variable(loc_targets.cuda()) cls_targets = Variable(cls_targets.cuda()) loc_preds, cls_preds = net(inputs) ix += 1 loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets) loss.backward() train_loss += loss.data[0] if (batch_idx + 1) % (opt.iter_size) == 0: # if True: for name, p in net.named_parameters(): p.grad.data.div_(ix) ix = 0 optimizer.step() optimizer.zero_grad() if (batch_idx + 1) % opt.plot_every == 0: vis.plot('loss', train_loss / (batch_idx + 1)) img = predict( net, box_coder, os.path.join(opt.data_root, trainset.fnames[batch_idx])) vis.img('predict', np.array(img).transpose(2, 0, 1)) if os.path.exists(opt.debug_file): import ipdb ipdb.set_trace() # if (epoch+1)%10 == 0 : # state = { # 'net': net.module.state_dict(), # # 'map': best_map_, # 'epoch': epoch, # } # torch.save(state, opt.checkpoint + '/%s.pth' % epoch) # if (epoch+1) % 30 == 0: # for param_group in optimizer.param_groups: # param_group['lr'] *= 0.1 current_loss = train_loss / (1 + batch_idx) if current_loss < best_loss: best_loss = current_loss torch.save(net.module.state_dict(), '/tmp/dsod.pth') if (epoch + 1) % opt.eval_every == 0: net.module.load_state_dict(torch.load('/tmp/dsod.pth')) aps = eval(net.module) map_ = aps['map'] if map_ > best_map_: print('Saving..') state = { 'net': net.module.state_dict(), 'map': best_map_, 'epoch': epoch, } best_map_ = map_ if not os.path.isdir(os.path.dirname(opt.checkpoint)): os.mkdir(os.path.dirname(opt.checkpoint)) best_path = opt.checkpoint + '/%s.pth' % best_map_ torch.save(state, best_path) else: net.module.load_state_dict(torch.load(best_path)['net']) for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 vis.log( dict(epoch=(epoch + 1), map=map_, loss=train_loss / (batch_idx + 1)))
img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root=args.data_root, list_file=[args.voc07_trainval, args.voc12_trainval], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size,
from PIL import Image from torchcv.datasets import ListDataset from torchcv.visualizations import vis_image from torchcv.transforms import resize, random_flip, random_crop, random_paste import torchvision.transforms as transforms def transform(img, boxes, labels): img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103)) img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=600, random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.ToTensor()(img) return img, boxes, labels dataset = ListDataset(root='/mnt/hgfs/D/mscoco/2017/val2017', list_file='torchcv/datasets/mscoco/coco17_val.txt', transform=transform) img, boxes, labels = dataset[0] vis_image(img, boxes)
img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset( root='/home/lyan/Documents/sample_uvb/all_imgs', list_file=[ '/home/lyan/Documents/torchcv/torchcv/datasets/uvb/uvb_train.txt' ], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels def val_transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(IMG_SIZE, IMG_SIZE)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trn_ds = ListDataset(root=img_dir, list_file=trn_labels_fpath, transform=trn_transform, test_code=args.test_code) val_ds = ListDataset(root=img_dir_test, list_file=val_labels_fpath, transform=val_transform, test_code=args.test_code) trn_dl = torch.utils.data.DataLoader(trn_ds, batch_size=BATCH_SIZE, shuffle=shuffle, num_workers=NUM_WORKERS) val_dl = torch.utils.data.DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS) def calculate_loss(batch_idx, batch, volatile=False):
img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root=args.root / 'voc_all_images', list_file=[ 'torchcv/datasets/voc/voc07_trainval.txt', 'torchcv/datasets/voc/voc12_trainval.txt' ], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root=args.root / 'voc_all_images',
return img, boxes, labels def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=img_size, max_size=img_size) img = pad(img, (img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root='/home/liukuang/data/coco/train2017', list_file='torchcv/datasets/mscoco/coco17_train.txt', transform=transform_train) testset = ListDataset(root='/home/liukuang/data/coco/val2017', list_file='torchcv/datasets/mscoco/coco17_val.txt', transform=transform_test) trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True, num_workers=8) testloader = torch.utils.data.DataLoader(testset, batch_size=8, shuffle=False, num_workers=8) # Model
net.eval() print('Preparing dataset..') img_size = 300 def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels dataset = ListDataset(root='/search/odin/liukuang/data/voc_all_images/', \ list_file='torchcv/datasets/voc/voc07_test.txt', transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2) box_coder = SSDBoxCoder() pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: gt_difficults = []
img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root='data/coco/images/train2017', list_file=[ 'torchcv/datasets/mscoco/coco17_train.txt', ], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root='/search/odin/liukuang/data/voc_all_images',
img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_w, img_h), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # transforms.Normalize([0.5]*3,[0.5]*3) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root=path_img, \ list_file='torchcv/datasets/kitti/kitti12_train2.txt', \ transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_w, img_h)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # transforms.Normalize([0.5]*3,[0.5]*3) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root=path_img, \ list_file='torchcv/datasets/kitti/kitti12_val2.txt', \
net.eval() print('Preparing dataset..') img_size = 512 def transform(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) return img, boxes, labels dataset = ListDataset(root='/home/ysdu/hardwareDisk/ysduDir/voc/voc_all_images', \ list_file='torchcv/datasets/voc/voc07_test.txt', transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2) box_coder = FPNSSDBoxCoder() pred_boxes = [] pred_labels = [] pred_scores = [] gt_boxes = [] gt_labels = [] with open('torchcv/datasets/voc/voc07_test_difficult.txt') as f: gt_difficults = []
img, boxes, labels = random_crop(img, boxes, labels) img, boxes = resize(img, boxes, size=(img_size, img_size), random_interpolation=True) img, boxes = random_flip(img, boxes) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels trainset = ListDataset(root=IMGS_ROOT, list_file=[LIST_FILE], transform=transform_train) def transform_test(img, boxes, labels): img, boxes = resize(img, boxes, size=(img_size, img_size)) img = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])(img) boxes, labels = box_coder.encode(boxes, labels) return img, boxes, labels testset = ListDataset(root=IMGS_ROOT, list_file=LIST_FILE,