def evaluate_detections(self, detections, rois): '''writing the results to disk''' pickle.dump( detections[1:], open(os.path.join(self._result_path, 'detections.pickle'), 'wb')) pickle.dump(rois, open(os.path.join(self._result_path, 'rois.pickle'), 'wb')) print(' => results written to %s' % self._result_path) for index, filename in enumerate(self.image_filenames): im = cv2.imread(filename) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, [det[index] for det in detections[1:]], self.classes[1:], filename.replace('.jpg', '_objects.png'), 1) self.draw_region_proposals( im, rois[index], 0.5, filename.replace('.jpg', '_proposals.png'))
def main(tempFileList, fileOp): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) out_dir = os.path.join( cur_path, 'demo/output/terror-det-rg-data-output/terror-det-v0.9-test/JPEGImages' ) if not os.path.exists(out_dir): os.makedirs(out_dir) # set up class names num_classes = 7 classes = [ 'tibetan flag', 'guns', 'knives', 'not terror', 'islamic flag', 'isis flag' ] # load demo data image_names = tempFileList data = [] for im_name in image_names: im_file = im_name print(im_file) im = cv2.imread(im_file, cv2.IMREAD_COLOR) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/demo/models/' + ('rfcn_voc'), 10, process=True) #modify by zxt #mx.model.save_checkpoint('f1/final', 10, sym, arg_params, aux_params) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test # fileOp = open(os.path.join(cur_path, 'terror-det-rg-test-result.txt'), 'w') fileOp = fileOp for idx, im_name in enumerate(image_names): print("begining process %s" % (im_name)) data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im_result = show_boxes(fileOp, im_name, im, dets_nms, classes, 1) cv2.imwrite(out_dir + im_name.split('/')[-1], im_result) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # load demo data image_names = [ 'COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg' ] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in range(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in range(len(data))] provide_label = [None for i in range(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in range(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print('testing {} {:.4f}s'.format(im_name, toc())) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print('done')
def process_one_batch_images_fun(isUrlFlag=False, one_batch_images_list=None, init_model_param=None, fileOp=None, vis=False): # init_model_param list : [sym, arg_params, aux_params] num_classes = 11 # 0 is background, # classes = ['tibetan flag', 'guns', 'knives', # 'not terror', 'islamic flag', 'isis flag'] classes = [ 'islamic flag', 'isis flag', 'tibetan flag', 'knives_true', 'guns_true', 'knives_false', 'knives_kitchen', 'guns_anime', 'guns_tools', 'not terror' ] image_names = one_batch_images_list if len(image_names) <= 0: return all_can_read_image = [] data = [] for im_name in image_names: #print("process : %s"%(im_name)) im = readImage_fun(isUrlFlag=isUrlFlag, imagePath=im_name) # 判断 这个图片是否可读 if np.shape(im) == (): print("ReadImageError : %s" % (im_name)) continue all_can_read_image.append(im_name) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(init_model_param[0], data_names, label_names, context=[mx.gpu(int(args.gpuId))], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=init_model_param[1], aux_params=init_model_param[2]) nms = gpu_nms_wrapper(config.TEST.NMS, 0) for idx, im_name in enumerate(all_can_read_image): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > args.threshold, :] dets_nms.append(cls_dets) print('testing {} {:.4f}s'.format(im_name, toc())) show_boxes(isUrlFlag=isUrlFlag, im_name=im_name, dets=dets_nms, classes=classes, scale=1, vis=vis, fileOp=fileOp, flag=args.outputFileFlag) print('process one batch images done') pass
def inference_rcnn_AICity(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb_Shuo() #roidb = imdb.gt_roidb() else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rfcn(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) gt_roidb = imdb.gt_roidb_Shuo() roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) print 'len(roidb):', len(roidb) # get test data iter test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) print 'inferring: ', prefix, ' epoch: ', epoch """# write parameters to file print 'type(arg_params):',type(arg_params) print 'type(aux_params):',type(aux_params) thefile1 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/arg_params.txt','w') thefile2 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/aux_params.txt','w') for item_arg in arg_params.items(): thefile1.write(item_arg[0] + str(type(item_arg[1])) + str(item_arg[1].shape)+'\n') for item_aux in aux_params.items(): thefile2.write(item_aux[0] + str(type(item_aux[1])) + str(item_aux[1].shape)+'\n') """ # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append( ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(cfg.TEST.NMS, 0) # start detection # pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) print 'test_data.size', test_data.size print 'test_data:', test_data print 'data_names:', data_names print 'test_data.provide_data:', test_data.provide_data print 'test_data.provide_label:', test_data.provide_label nnn = 0 #classes = ['__background','vehicle'] classes = [ 'Car', 'SUV', 'SmallTruck', 'MediumTruck', 'LargeTruck', 'Pedestrian', 'Bus', 'Van', 'GroupOfPeople', 'Bicycle', 'Motorcycle', 'TrafficSignal-Green', 'TrafficSignal-Yellow', 'TrafficSignal-Red' ] #,'Pedestrian', 'GroupOfPeople','Bicycle', 'Motorcycle','TrafficSignal-Green', 'TrafficSignal-Yellow', 'TrafficSignal-Red' for im_info, data_batch in test_data: print nnn #print 'roidb[nnn]:',roidb[nnn]['image'] image_name = roidb[nnn]['image'] tic() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, cfg) boxes = boxes_all[0].astype('f') scores = scores_all[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] threshold = 0.2 # confidence thrshold between 0 and 1 cls_dets = cls_dets[cls_dets[:, -1] > threshold, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(image_name, toc()) # visualize im = cv2.imread(image_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #print 'cls_dets:',cls_dets show_boxes(im, dets_nms, classes, 1) nnn = nnn + 1 image_name_length = len(image_name.split('/')) magefile_name = image_name.split('/')[image_name_length - 1] image_name_lean = image_name.split('.')[0] if not os.path.exists(os.path.join('data', 'output')): os.makedirs(os.path.join('data', 'output')) output_file = os.path.join('data', 'output', image_name_lean + '.txt') thefile = open(output_file, 'a') #det_id = 0 #for x_small,y_small,x_large,y_large,prob in dets_nms[0]: #det_id += 1 for cls_idx, cls_name in enumerate(classes): cls_dets = dets_nms[cls_idx] for x_small, y_small, x_large, y_large, prob in cls_dets: thefile.write(cls_name + ' ' + str(x_small) + ' ' + str(y_small) + ' ' + str(max(x_small + 0.01, x_large)) + ' ' + str(max(y_small + 0.01, y_large)) + ' ' + str(prob) + '\n')
def main(): # get symbol pprint.pprint(config) config.symbol = "resnet_v1_101_fpn_dcn_rcnn" if not args.rfcn_only else "resnet_v1_101_fpn_rcnn" sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # test # find all videos video_path = "../../tmp"#"../../aic2018/track1/track1_videos" video_files = sorted([ x for x in os.listdir(video_path) if x.endswith(".mp4")]) save_path = "../../tmp/output"#"../../aic2018/track1/output" if not os.path.isdir(save_path): os.makedirs(save_path) print("processing {} videos...".format(len(video_files))) pbar = tqdm(total=len(video_files)) for vf in video_files: vid = imageio.get_reader(os.path.join(video_path, vf),'ffmpeg') data = [] for idx, im in enumerate(vid): if idx == 0: #assert os.path.exists(im_path + im_name), ('%s does not exist'.format(im_path + im_name)) #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) else: break #data.append({'data': None, 'im_info': None}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] print("hhhhh") print(provide_data, provide_label) print("hhhhh") arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'fpn_coco'), 0, process=True) #print(type(arg_params), type(aux_params)) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) print("successfully load model") vout = [] # write to video writer = skvideo.io.FFmpegWriter(os.path.join(save_path, vf.replace(".mp4","_out.mp4")), outputdict={'-vcodec': 'libx264', '-b': '300000000'}) for frame_idx, im in enumerate(vid): #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im_original = im.copy() target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_idx = [{"data": im_tensor, "im_info": im_info}] data_idx = [[mx.nd.array(data_idx[i][name]) for name in data_names] for i in xrange(len(data_idx))] data_batch = mx.io.DataBatch(data=[data_idx[0]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] num_dets = 0 for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.65, :] dets_nms.append(cls_dets) num_dets += cls_dets.shape[0] print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(vf, frame_idx, toc(), num_dets) # save results #im = cv2.imread(im_path + im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #im_bbox = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox) save_im, outputs = show_boxes(im_original, dets_nms, classes, 1, False) #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im) writer.writeFrame(save_im) for out in outputs: vout.append([frame_idx] + out) # save the whole video detection into pickle file writer.close() with open(os.path.join(save_path, vf.replace(".mp4", "_detect.pkl")), "wb") as f: pickle.dump(vout, f, protocol=2) pbar.update(1) pbar.close() print 'done'
def main(): # get symbol pprint.pprint(config) #config.symbol = "resnet_v1_101_fpn_dcn_rcnn" if not args.rfcn_only else "resnet_v1_101_fpn_rcnn" config.symbol = "resnet_v1_101_fpn_dcn_rcnn" sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 5 classes = ["car", "bus", "van", "others"] # load demo videos im_path = '../../aic2018/track1/images/' image_names = [ x for x in os.listdir('../../aic2018/track1/images/') if (x.endswith(".jpg") and (x.startswith("9_1") or x.startswith("9_1")) ) and not x.endswith("_bbox.jpg") ] data = [] for idx, im_name in enumerate(image_names[:1]): if idx == 0: assert os.path.exists(im_path + im_name), ( '%s does not exist'.format(im_path + im_name)) im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) else: data.append({'data': None, 'im_info': None}) print(data) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[0][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # what does provide_data and provide_label work for? provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] ## load parameters arg_params, aux_params = load_param(cur_path + '/../model/' + 'fpn_detrac', 1, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) print("successfully load model") # find all videos video_path = "../../tmp" video_files = [x for x in os.listdir(video_path) if x.endswith(".mp4")] save_path = "../../tmp/output" if not os.path.isdir(save_path): os.makedirs(save_path) print("processing {} videos...".format(len(video_files))) pbar = tqdm(total=len(video_files)) for vf in video_files: vid = imageio.get_reader(os.path.join(video_path, vf), 'ffmpeg') vout = [] for frame_idx, im in enumerate(vid): #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_idx = [{"data": im_tensor, "im_info": im_info}] data_idx = [[ mx.nd.array(data_idx[i][name]) for name in data_names ] for i in xrange(len(data_idx))] data_batch = mx.io.DataBatch( data=[data_idx[0]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} the {} th frame at {:.4f}s, detections {}'.format( vf, frame_idx, toc(), len(dets_nms)) # save results #im = cv2.imread(im_path + im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #im_bbox = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox) save_im, outputs = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im) for out in outputs: vout.append([frame_idx] + out) # save the whole video detection into pickle file with open(os.path.join(save_path, vf.replace(".mp4", ".pkl")), "wb") as f: pickle.dump(vout, f, protocol=2) pbar.update(1) pbar.close() print 'done'
def process_one_batch_images_fun(image=None, init_model_param=None, fileOp=None, vis=None): num_classes = RFCN_DCN_CONFIG['num_classes'] # 0 is background, classes = RFCN_DCN_CONFIG['num_classes_name_list'] im_name = image all_can_read_image = [] data = [] # all_can_read_image.append(im_name) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im_name, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(init_model_param[0], data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=init_model_param[1], aux_params=init_model_param[2]) nms = gpu_nms_wrapper(config.TEST.NMS, 0) for idx, im_name in enumerate(all_can_read_image): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :] dets_nms.append(cls_dets) print('testing {} {:.4f}s'.format(im_name, toc())) show_boxes(im_name=im_name, dets=dets_nms, classes=classes, scale=1, vis=vis, fileOp=fileOp, flag=1) print('process one batch images done') pass