def test_custom(video_path, video_name): DEBUG = False set_type = 'test' devkit_path = video_path + 'test' devkit_annopath = os.path.join(video_path, 'test', 'Annotations') devkit_imgpath = os.path.join(video_path, 'test', 'JPEGImages') devkit_imgsetpath = os.path.join(video_path, 'test', 'ImageSets', 'Main') frame_list = [] # load net num_classes_gauge = len(labelmap_gauge) + 1 # +1 for background net = build_ssd('test', 300, num_classes_gauge) # initialize SSD net.load_state_dict(torch.load(args.trained_model_gauge)) net.eval() num_classes_waterline = len(labelmap_waterline) + 1 # +1 for background net1 = build_ssd('test', 300, num_classes_waterline) # initialize SSD net1.load_state_dict(torch.load(args.trained_model_waterline)) net1.eval() print('Finished loading model!') # load data dataset1 = customDetection( video_path, [(video_name, set_type)], None, customAnnotationTransform(class_to_ind=dict( zip(CUSTOM_CLASSES_GAUGE, range(len(CUSTOM_CLASSES_GAUGE)))))) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset1, BaseTransform(net.size, (104, 117, 123)), thresh=args.visual_threshold, labelmap=labelmap_gauge) rootPath = os.path.join(video_path, video_name) rootPath_temp = os.path.join(video_path, 'test') imgList_gauge = {} with open(os.path.join(args.save_folder, 'result_gauge.txt'), 'r') as f: text_lines = f.readlines() for line in text_lines: info = line.split(" ") name, score, x1, y1, x2, y2 = info if name in imgList_gauge: if float(score) > imgList_gauge[name]['score']: imgList_gauge[name] = { 'score': float(score), 'x1': float(x1), 'y1': float(y1), 'x2': float(x2), 'y2': float(y2) } else: imgList_gauge[name] = { 'score': float(score), 'x1': float(x1), 'y1': float(y1), 'x2': float(x2), 'y2': float(y2) } img_path = os.path.join(rootPath, 'JPEGImages', '%s.jpg') devkit_imgpath = os.path.join(get_output_dir(devkit_imgpath), '%s.jpg') devkit_imgsetpath = os.path.join(get_output_dir(devkit_imgsetpath), '%s.txt') devkit_annopath = os.path.join(get_output_dir(devkit_annopath), '%s.xml') with open(devkit_imgsetpath % ('test'), 'w') as f: for obj in imgList_gauge.items(): name, img = obj image = cv2.imread(img_path % name) (h, w, c) = image.shape x1 = max(math.floor(img['x1']), 0) y1 = max(math.floor(img['y1']), 0) x2 = min(math.floor(img['x2']), w) y2 = min(math.floor(img['y2']), h) if DEBUG: cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 5) image = cv2.resize(image, (512, 512)) cv2.imshow('w1', image) cv2.waitKey() else: image = image[y1:y2, x1:x2] # cv2.imshow('w1', image) cv2.imwrite(devkit_imgpath % name, image, [100]) f.write(name + '\n') # cv2.waitKey() with open(devkit_annopath % (name), 'w') as f_a: f_a.write(xmlData(name, x2 - x1, y2 - y1, 'waterline')) dataset2 = customDetection( video_path, [('test', set_type)], None, customAnnotationTransform(class_to_ind=dict( zip(CUSTOM_CLASSES_WATERLINE, range(len( CUSTOM_CLASSES_WATERLINE)))))) if args.cuda: net1 = net1.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net1, args.cuda, dataset2, BaseTransform(net.size, (104, 117, 123)), thresh=args.visual_threshold, labelmap=labelmap_waterline) imgList_waterline = {} with open(os.path.join(args.save_folder, 'result_waterline.txt'), 'r') as f: text_lines = f.readlines() for line in text_lines: info = line.split(" ") name, score, x1, y1, x2, y2 = info if name in imgList_waterline: if float(score) > imgList_waterline[name]['score']: imgList_waterline[name] = { 'score': float(score), 'x1': float(x1), 'y1': float(y1), 'x2': float(x2), 'y2': float(y2) } else: imgList_waterline[name] = { 'score': float(score), 'x1': float(x1), 'y1': float(y1), 'x2': float(x2), 'y2': float(y2) } imgList_mark = {} with open(os.path.join(args.save_folder, 'result_mark.txt'), 'r') as f: text_lines = f.readlines() for line in text_lines: info = line.split(" ") name, score, x1, y1, x2, y2 = info if name in imgList_mark: if float(score) > imgList_mark[name]['score']: imgList_mark[name] = { 'score': float(score), 'x1': float(x1), 'y1': float(y1), 'x2': float(x2), 'y2': float(y2) } else: imgList_mark[name] = { 'score': float(score), 'x1': float(x1), 'y1': float(y1), 'x2': float(x2), 'y2': float(y2) } #cv2.namedWindow('w2',1) use_origin = True is_ok = False if not use_origin: img_path = os.path.join(rootPath_temp, 'JPEGImages', '%s.jpg') count = 0 for name in imgList_gauge: img_gauge = imgList_gauge[name] if name in imgList_waterline and name in imgList_mark: img_waterline = imgList_waterline[name] img_mark = imgList_mark[name] else: continue if not use_origin: image = cv2.imread(img_path % name) (h, w, c) = image.shape x1_w = max(math.floor(img_waterline['x1']), 0) y1_w = max(math.floor(img_waterline['y1']), 0) x2_w = min(math.floor(img_waterline['x2']), w) y2_w = min(math.floor(img_waterline['y2']), h) x1_m = max(math.floor(img_mark['x1']), 0) y1_m = max(math.floor(img_mark['y1']), 0) x2_m = min(math.floor(img_mark['x2']), w) y2_m = min(math.floor(img_mark['y2']), h) cv2.rectangle(image, (x1_w, y1_w), (x2_w, y2_w), (255, 0, 0), 5) cv2.rectangle(image, (x1_m, y1_m), (x2_m, y2_m), (0, 255, 0), 5) image = cv2.resize(image, (512, 512)) cv2.imshow('w2', image) cv2.waitKey() else: image = cv2.imread(img_path % name) (h, w, c) = image.shape x1_g = math.floor(img_gauge['x1']) y1_g = math.floor(img_gauge['y1']) x2_g = math.floor(img_gauge['x2']) y2_g = math.floor(img_gauge['y2']) x1_w = max(math.floor(img_waterline['x1']), 0) y1_w = max(math.floor(img_waterline['y1']), 0) x2_w = min(math.floor(img_waterline['x2']), w) y2_w = min(math.floor(img_waterline['y2']), h) x1_m = max(math.floor(img_mark['x1']), 0) y1_m = max(math.floor(img_mark['y1']), 0) x2_m = min(math.floor(img_mark['x2']), w) y2_m = min(math.floor(img_mark['y2']), h) is_ok = False if (y1_w + y2_w) > (y1_m + y2_m): count += 1 is_ok = True cv2.rectangle(image, (x1_g, y1_g), (x2_g, y2_g), (255, 0, 0), 5) cv2.rectangle(image, (x1_g + x1_w, y1_g + y1_w), (x1_g + x2_w, y1_g + y2_w), (0, 255, 0), 5) cv2.rectangle(image, (x1_g + x1_m, y1_g + y1_m), (x1_g + x2_m, y1_g + y2_m), (0, 0, 255), 5) image = cv2.resize(image, (512, 512)) # cv2.putText(image, 'gauge: %.2f' % img_gauge['score'], (10, 40), cv2.FONT_HERSHEY_COMPLEX, 1.2, (255, 0, 0), 2) # cv2.putText(image, 'waterline: %.2f' % img_waterline['score'], (10, 80), cv2.FONT_HERSHEY_COMPLEX, 1.2, (0, 255, 0), 2) # cv2.putText(image, 'mark: %.2f' % img_mark['score'], (10, 120), cv2.FONT_HERSHEY_COMPLEX, 1.2, (0, 0, 255), 2) cv2.putText(image, 'OK' if is_ok else 'Warning', (10, 40), cv2.FONT_HERSHEY_COMPLEX, 1.2, (0, 255, 0) if is_ok else (0, 0, 255), 2) frame_list.append(image) # cv2.imshow('w2', image) # cv2.waitKey() print('correct count:', count) return frame_list
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) elif args.dataset == 'CUSTOM': if args.dataset_root == VOC_ROOT or args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = custom dataset = customDetection(root=args.dataset_root, transform=SSDAugmentation(cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: pass # resnet_weights = torch.load(args.save_folder + args.basenet) # print('Loading base network...') # ssd_net.resnet.load_state_dict(resnet_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Epochj Size:', epoch_size) print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data # images, targets = next(batch_iterator) try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() # loc_loss += loss_l.data[0] # conf_loss += loss_c.data[0] loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) # print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: # update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], # iter_plot, epoch_plot, 'append') update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), args.save_folder + '/ssd300_COCO_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') evaluate_detections(all_boxes, output_dir, dataset) def evaluate_detections(box_list, output_dir, dataset): write_voc_results_file(box_list, dataset) do_python_eval(output_dir) if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', 300, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = customDetection(args.custom_root, [('shenhe', set_type)], BaseTransform(300, dataset_mean), customAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset,