def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn_online_train' model = '/../model/rfcn_fgfa_flownet_vid_original' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names num_classes = 31 classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data snippet_name = 'ILSVRC2015_val_00177001' #'ILSVRC2015_val_00007016'#'ILSVRC2015_val_00000004' # 'ILSVRC2015_val_00007016'# 'ILSVRC2015_val_00044006' #'ILSVRC2015_val_00007010' #'ILSVRC2015_val_00016002' # ILSVRC2015_val_00010001: zebra, # 37002:cat and fox, motion blur # ILSVRC2015_val_00095000: fox, defocus image_names = glob.glob(cur_path + '/../demo/' + snippet_name + '/*.JPEG') image_names.sort() output_dir = cur_path + '/../demo/test_'# rfcn_fgfa_online_train_' output_dir_ginst = cur_path + '/../demo/test_ginst_' # rfcn_fgfa_online_train_' output_dir_linst = cur_path + '/../demo/test_linst_' if (cfg.TEST.SEQ_NMS): output_dir += 'SEQ_NMS_' output_dir_ginst += 'SEQ_NMS_' output_dir += snippet_name + '/' output_dir_ginst += snippet_name + '/' output_dir_linst += snippet_name + '/' data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor}) # get predictor print('get-predictor') data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) #add parameters for instance cls & regression arg_params['rfcn_ibbox_bias'] = arg_params['rfcn_bbox_bias'].copy() #deep copy arg_params['rfcn_ibbox_weight'] = arg_params['rfcn_bbox_weight'].copy() #deep copy max_inst = cfg.TEST.NUM_INSTANCES feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] all_boxes_inst = [[[] for _ in range(len(data))] for _ in range(num_classes)] ginst_mem = [] # list for instance class sim_array_global = [] # similarity array list ginst_ID = 0 data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = True file_idx = 0 thresh = (1e-3) for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] if(idx != len(data)-1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: #if file_idx ==15: # print( '%d frame' % (file_idx)) ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) #put 19 data & feat list into data_batch #aggr_predictors._mod.forward(data_batch) #sidong #zxcv= aggr_predictors._mod.get_outputs(merge_multi_context=False)#sidong pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg) # get box result [[scores, pred_boxes, rois, data_dict, iscores, ipred_boxes, cropped_embed]] data_batch.data[0][-2] = None # 19 frames of data possesses much memory, so clear it data_batch.provide_data[0][-2] = ('data_cache', None) # also clear shape info of data data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) ginst_ID_prev = ginst_ID ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, all_boxes_inst, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales, ginst_mem, sim_array_global, ginst_ID) #out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image, # data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) ginst_ID_now = ginst_ID init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors, arg_params) total_time = time.time()-t1 if (cfg.TEST.SEQ_NMS==False): if cfg.TEST.DISPLAY[0]: save_image(output_dir, file_idx, out_im) if cfg.TEST.DISPLAY[1]: save_image(output_dir_ginst, file_idx, out_im2) if cfg.TEST.DISPLAY[2]: save_image(output_dir_linst, file_idx, out_im_linst) #testing by metric print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1))) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg) ginst_ID_prev = ginst_ID ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, all_boxes_inst, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales, ginst_mem, sim_array_global, ginst_ID) # out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image, # data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) ginst_ID_now = ginst_ID init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors ,arg_params) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): if cfg.TEST.DISPLAY[0]: save_image(output_dir, file_idx, out_im) if cfg.TEST.DISPLAY[1]: save_image(output_dir_ginst, file_idx, out_im2) if cfg.TEST.DISPLAY[2]: save_image(output_dir_linst, file_idx, out_im_linst) print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1))) file_idx += 1 end_counter += 1 if(cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print('done')
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_fgfa_flownet_vid' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names num_classes = 31 classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn_fgfa/' if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor}) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] if(idx != len(data)-1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time()-t1 if (cfg.TEST.SEQ_NMS==False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1)) file_idx += 1 end_counter+=1 if(cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def predict(self, images, feat_output, aggr_feat_output): model = self.model all_frame_interval = self.all_frame_interval feat_sym = self.feat_sym aggr_sym = self.aggr_sym num_classes = self.num_classes classes = self.classes max_per_image = self.max_per_image output_dir = cur_path + '/../demo/rfcn_fgfa_{}/'.format(self.index) self.index += 1 if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im in images: target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({ 'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor }) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (11, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((11, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype( np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype( np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch( data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) assert len(aggr_feat) == 1 data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) assert len(aggr_feat) == 1 out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 if (cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [ all_boxes[j][idx] for j in range(1, num_classes) ] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/data2/output/fgfa_rfcn/jrdb/resnet_v1_101_flownet_jrdb/VID_train_15frames/fgfa_rfcn_vid' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names classes = ['__background__', 'p'] num_classes = len(classes) data_loader = DataLoader(args.input + '/*', cfg) #line = '%s %s %s %s\n' % (dire.replace(join(join(data_dir, ANNOTATIONS, VID)) + "/", ""), '1', str(i), str(num_of_images)) output_dir_tmp = '%s_%s_epoc_%d' % ( args.dataset, os.path.basename(args.input), args.epoc) output_dir = os.path.join("/data2", "output", output_dir_tmp) print(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) # for im_name in image_names: # im_info, im_tensor = read_data(im_name) # im_info, im_tensor = read_data(im_name) # data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor}) # ### data (1,3, 562,1000) #data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] # list of data images # get predictor print 'get-predictor' label_names = [] t1 = time.time() max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int)))) ]] provide_data, provide_label = data_loader.get_provided_data_and_label() arg_params, aux_params = load_param(model, args.epoc, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_idx_ = data_loader.get_data_by_index(idx) len_data = data_loader.get_len_data() data_batch = mx.io.DataBatch(data=[data_idx_], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data_idx_) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len_data)] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = args.threshold for idx, element in enumerate(data_loader): # loop through list of images! data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, element) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len_data - 1): image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) if len(data_list) >= all_frame_interval - 1: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 if (cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len_data): boxes_this_image = [[]] + [ all_boxes[j][idx] for j in range(1, num_classes) ] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def predict(self, images, feat_output, aggr_feat_output): model = self.model all_frame_interval = self.all_frame_interval feat_sym = self.feat_sym aggr_sym = self.aggr_sym # load video data # data = [] # for im in images: # target_size = cfg.SCALES[0][0] # max_size = cfg.SCALES[0][1] # im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) # im_tensor = transform(im, cfg.network.PIXEL_MEANS) # im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) # feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) # data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor}) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) # get predictor data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 # data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] data = VideoLoader(images) max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (interval, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((interval, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype( np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype( np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])] for _ in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # First frame of the video idx = 0 data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # if feat_output is not None: # feat_output.append(feat.asnumpy()[0][:1024]) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch( data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) if feat_output is not None: feat_output.append(feat.asnumpy()[0][:1024]) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) if feat_output is not None: feat_output.append(feat.asnumpy()[0][:1024]) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg, aggr_feats=True) assert len(aggr_feat) == 1 if aggr_feat_output is not None: aggr_feat_output.append(aggr_feat[0].asnumpy()[0]) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) print '\r(main) Testing FGFA R-FCN: {} / {}'.format( file_idx, len(images)), file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) if feat_output is not None: feat_output.append(feat.asnumpy()[0][:1024]) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg, aggr_feats=True) assert len(aggr_feat) == 1 if aggr_feat_output is not None: aggr_feat_output.append(aggr_feat[0].asnumpy()[0]) print '\r(end) Testing FGFA R-FCN: {} / {}'.format( file_idx, len(images)), file_idx += 1 end_counter += 1 print
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/data/output/fgfa_rfcn/vis_drone/resnet_v1_101_flownet_vis_drone_rfcn_end2end_ohem/VID_train_vid/fgfa_rfcn_vid' model = '/data2/output/fgfa_rfcn/jrdb/resnet_v1_101_flownet_jrdb/VID_train_15frames_cut/fgfa_rfcn_vid' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names classes = [ '__background__', # always index 0 "ignored regions", "pedestrian", "people", "bicycle", "car", "van", "truck", "tricycle", "awning-tricycle", "bus", "motor", "others" ] num_classes = len(classes) # load demo data classes = ['__background__', 'p'] num_classes = len(classes) image_names = glob.glob(args.input + '/*') image_names.sort() print("num of images", len(image_names)) output_dir = "/data2/demo_new2/" if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({ 'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor }) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(model, 190, process=True) print arg_params print aux_params feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[idx]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 0.4 for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, element) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 if (cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [ all_boxes[j][idx] for j in range(1, num_classes) ] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'