Example #1
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn_online_train'
    model = '/../model/rfcn_fgfa_flownet_vid_original'

    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names
    num_classes = 31
    classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data

    snippet_name = 'ILSVRC2015_val_00177001' #'ILSVRC2015_val_00007016'#'ILSVRC2015_val_00000004' # 'ILSVRC2015_val_00007016'# 'ILSVRC2015_val_00044006' #'ILSVRC2015_val_00007010' #'ILSVRC2015_val_00016002'
    # ILSVRC2015_val_00010001: zebra,
    # 37002:cat and fox, motion blur
    # ILSVRC2015_val_00095000: fox, defocus
    image_names = glob.glob(cur_path + '/../demo/' + snippet_name + '/*.JPEG')
    image_names.sort()
    output_dir = cur_path + '/../demo/test_'# rfcn_fgfa_online_train_'
    output_dir_ginst = cur_path + '/../demo/test_ginst_'  # rfcn_fgfa_online_train_'
    output_dir_linst = cur_path + '/../demo/test_linst_'
    if (cfg.TEST.SEQ_NMS):
        output_dir += 'SEQ_NMS_'
        output_dir_ginst += 'SEQ_NMS_'
    output_dir += snippet_name + '/'
    output_dir_ginst += snippet_name + '/'
    output_dir_linst += snippet_name + '/'

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({'data': im_tensor, 'im_info': im_info,  'data_cache': im_tensor,    'feat_cache': im_tensor})



    # get predictor

    print('get-predictor')
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM,
                                                np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int),
                                                np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 0, process=True)

    #add parameters for instance cls & regression
    arg_params['rfcn_ibbox_bias'] = arg_params['rfcn_bbox_bias'].copy()  #deep copy
    arg_params['rfcn_ibbox_weight'] = arg_params['rfcn_bbox_weight'].copy()  #deep copy
    max_inst = cfg.TEST.NUM_INSTANCES


    feat_predictors = Predictor(feat_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)


    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                 provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                 provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    all_boxes = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]
    all_boxes_inst = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]

    ginst_mem = [] # list for instance class
    sim_array_global = [] # similarity array list
    ginst_ID = 0

    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = True
    file_idx = 0
    thresh = (1e-3)
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        if(idx != len(data)-1):
            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #if file_idx ==15:
                #    print( '%d frame' % (file_idx))
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch) #put 19 data & feat list into data_batch
                #aggr_predictors._mod.forward(data_batch) #sidong
                #zxcv= aggr_predictors._mod.get_outputs(merge_multi_context=False)#sidong
                pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg) # get box result [[scores, pred_boxes, rois, data_dict, iscores, ipred_boxes, cropped_embed]]

                data_batch.data[0][-2] = None # 19 frames of data possesses much memory, so clear it
                data_batch.provide_data[0][-2] = ('data_cache', None) # also clear shape info of data
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                ginst_ID_prev = ginst_ID
                ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes,
                                                    all_boxes_inst, file_idx, max_per_image, vis,
                                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales, ginst_mem, sim_array_global, ginst_ID)
                #out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image,
                #                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                ginst_ID_now = ginst_ID
                init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors, arg_params)

                total_time = time.time()-t1
                if (cfg.TEST.SEQ_NMS==False):
                    if cfg.TEST.DISPLAY[0]:
                        save_image(output_dir, file_idx, out_im)
                    if cfg.TEST.DISPLAY[1]:
                        save_image(output_dir_ginst, file_idx, out_im2)
                    if cfg.TEST.DISPLAY[2]:
                        save_image(output_dir_linst, file_idx, out_im_linst)
                #testing by metric


                print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1)))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg)

                ginst_ID_prev = ginst_ID
                ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes,
                                                      all_boxes_inst, file_idx, max_per_image, vis,
                                                      data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales,
                                                      ginst_mem, sim_array_global, ginst_ID)
                # out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image,
                #                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                ginst_ID_now = ginst_ID
                init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors ,arg_params)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    if cfg.TEST.DISPLAY[0]:
                        save_image(output_dir, file_idx, out_im)
                    if cfg.TEST.DISPLAY[1]:
                        save_image(output_dir_ginst, file_idx, out_im2)
                    if cfg.TEST.DISPLAY[2]:
                        save_image(output_dir_linst, file_idx, out_im_linst)
                print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1)))
                file_idx += 1
                end_counter += 1

    if(cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)]
            out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg)
            save_image(output_dir, idx, out_im)

    print('done')
Example #2
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_fgfa_flownet_vid'
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names
    num_classes = 31
    classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data

    image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn_fgfa/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({'data': im_tensor, 'im_info': im_info,  'data_cache': im_tensor,    'feat_cache': im_tensor})



    # get predictor

    print 'get-predictor'
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM,
                                                np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int),
                                                np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 0, process=True)

    feat_predictors = Predictor(feat_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)


    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                 provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                 provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    all_boxes = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = 1e-3
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        if(idx != len(data)-1):

            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                data_batch.data[0][-2] = None
                data_batch.provide_data[0][-2] = ('data_cache', None)
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis,
                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                total_time = time.time()-t1
                if (cfg.TEST.SEQ_NMS==False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis,
                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1))
                file_idx += 1
                end_counter+=1

    if(cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)]
            out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg)
            save_image(output_dir, idx, out_im)

    print 'done'
    def predict(self, images, feat_output, aggr_feat_output):

        model = self.model
        all_frame_interval = self.all_frame_interval
        feat_sym = self.feat_sym
        aggr_sym = self.aggr_sym
        num_classes = self.num_classes
        classes = self.classes
        max_per_image = self.max_per_image

        output_dir = cur_path + '/../demo/rfcn_fgfa_{}/'.format(self.index)
        self.index += 1
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        data = []
        for im in images:
            target_size = cfg.SCALES[0][0]
            max_size = cfg.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=cfg.network.IMAGE_STRIDE)
            im_tensor = transform(im, cfg.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)

            feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
            data.append({
                'data': im_tensor,
                'im_info': im_info,
                'data_cache': im_tensor,
                'feat_cache': im_tensor
            })

        # get predictor

        print 'get-predictor'
        data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
        label_names = []

        t1 = time.time()
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        max_data_shape = [[
            ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                      max([v[1] for v in cfg.SCALES]))),
            ('data_cache', (11, 3, max([v[0] for v in cfg.SCALES]),
                            max([v[1] for v in cfg.SCALES]))),
            ('feat_cache',
             ((11, cfg.network.FGFA_FEAT_DIM,
               np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int),
               np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int))))
        ]]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for _ in xrange(len(data))]

        arg_params, aux_params = load_param(cur_path + model, 0, process=True)

        feat_predictors = Predictor(feat_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)
        aggr_predictors = Predictor(aggr_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)
        nms = py_nms_wrapper(cfg.TEST.NMS)

        # First frame of the video
        idx = 0
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        all_boxes = [[[] for _ in range(len(data))]
                     for _ in range(num_classes)]
        data_list = deque(maxlen=all_frame_interval)
        feat_list = deque(maxlen=all_frame_interval)
        image, feat = get_resnet_output(feat_predictors, data_batch,
                                        data_names)
        # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
        while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
            data_list.append(image)
            feat_list.append(feat)

        vis = False
        file_idx = 0
        thresh = 1e-3
        for idx, element in enumerate(data):

            data_batch = mx.io.DataBatch(
                data=[element],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, element)]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]

            if (idx != len(data) - 1):

                if len(data_list) < all_frame_interval - 1:
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    data_list.append(image)
                    feat_list.append(feat)

                else:
                    #################################################
                    # main part of the loop
                    #################################################
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    data_list.append(image)
                    feat_list.append(feat)

                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch, data_names,
                                                       scales, cfg)
                    assert len(aggr_feat) == 1

                    data_batch.data[0][-2] = None
                    data_batch.provide_data[0][-2] = ('data_cache', None)
                    data_batch.data[0][-1] = None
                    data_batch.provide_data[0][-1] = ('feat_cache', None)

                    out_im = process_pred_result(
                        classes, pred_result, num_classes, thresh, cfg, nms,
                        all_boxes, file_idx, max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(),
                        scales)
                    total_time = time.time() - t1
                    if (cfg.TEST.SEQ_NMS == False):
                        save_image(output_dir, file_idx, out_im)
                    print 'testing {} {:.4f}s'.format(
                        str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                    file_idx += 1

            else:
                #################################################
                # end part of a video                           #
                #################################################

                end_counter = 0
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                    data_list.append(image)
                    feat_list.append(feat)
                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch, data_names,
                                                       scales, cfg)
                    assert len(aggr_feat) == 1

                    out_im = process_pred_result(
                        classes, pred_result, num_classes, thresh, cfg, nms,
                        all_boxes, file_idx, max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(),
                        scales)

                    total_time = time.time() - t1
                    if (cfg.TEST.SEQ_NMS == False):
                        save_image(output_dir, file_idx, out_im)
                    print 'testing {} {:.4f}s'.format(
                        str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                    file_idx += 1
                    end_counter += 1

        if (cfg.TEST.SEQ_NMS):
            video = [all_boxes[j][:] for j in range(1, num_classes)]
            dets_all = seq_nms(video)
            for cls_ind, dets_cls in enumerate(dets_all):
                for frame_ind, dets in enumerate(dets_cls):
                    keep = nms(dets)
                    all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
            for idx in range(len(data)):
                boxes_this_image = [[]] + [
                    all_boxes[j][idx] for j in range(1, num_classes)
                ]
                out_im = draw_all_detection(data[idx][0].asnumpy(),
                                            boxes_this_image, classes,
                                            scales[0], cfg)
                save_image(output_dir, idx, out_im)

        print 'done'
Example #4
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/data2/output/fgfa_rfcn/jrdb/resnet_v1_101_flownet_jrdb/VID_train_15frames/fgfa_rfcn_vid'
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names

    classes = ['__background__', 'p']
    num_classes = len(classes)
    data_loader = DataLoader(args.input + '/*', cfg)

    #line = '%s %s %s %s\n' % (dire.replace(join(join(data_dir, ANNOTATIONS, VID)) + "/", ""), '1', str(i), str(num_of_images))
    output_dir_tmp = '%s_%s_epoc_%d' % (
        args.dataset, os.path.basename(args.input), args.epoc)
    output_dir = os.path.join("/data2", "output", output_dir_tmp)
    print(output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
    # for im_name in image_names:
    #     im_info, im_tensor = read_data(im_name)
    #     im_info, im_tensor = read_data(im_name)
    #     data.append({'data': im_tensor, 'im_info': im_info,  'data_cache': im_tensor,    'feat_cache': im_tensor})
    #     ### data (1,3, 562,1000)

    #data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] # list of data images
    # get predictor

    print 'get-predictor'

    label_names = []

    t1 = time.time()

    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                  max([v[1] for v in cfg.SCALES]))),
        ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]),
                        max([v[1] for v in cfg.SCALES]))),
        ('feat_cache',
         ((19, cfg.network.FGFA_FEAT_DIM,
           np.ceil(max([v[0]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int),
           np.ceil(max([v[1]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int))))
    ]]

    provide_data, provide_label = data_loader.get_provided_data_and_label()

    arg_params, aux_params = load_param(model, args.epoc, process=True)
    feat_predictors = Predictor(feat_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)

    # First frame of the video
    idx = 0
    data_idx_ = data_loader.get_data_by_index(idx)
    len_data = data_loader.get_len_data()
    data_batch = mx.io.DataBatch(data=[data_idx_],
                                 label=[],
                                 pad=0,
                                 index=idx,
                                 provide_data=[[
                                     (k, v.shape)
                                     for k, v in zip(data_names, data_idx_)
                                 ]],
                                 provide_label=[None])
    scales = [
        data_batch.data[i][1].asnumpy()[0, 2]
        for i in xrange(len(data_batch.data))
    ]
    all_boxes = [[[] for _ in range(len_data)] for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = args.threshold
    for idx, element in enumerate(data_loader):  # loop through list of images!
        data_batch = mx.io.DataBatch(data=[element],
                                     label=[],
                                     pad=0,
                                     index=idx,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, element)
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        if (idx != len_data - 1):
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)
            data_list.append(image)
            feat_list.append(feat)

            if len(data_list) >= all_frame_interval - 1:

                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                data_batch.data[0][-2] = None
                data_batch.provide_data[0][-2] = ('data_cache', None)
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(
                    str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(
                    str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                file_idx += 1
                end_counter += 1

    if (cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len_data):
            boxes_this_image = [[]] + [
                all_boxes[j][idx] for j in range(1, num_classes)
            ]
            out_im = draw_all_detection(data[idx][0].asnumpy(),
                                        boxes_this_image, classes, scales[0],
                                        cfg)
            save_image(output_dir, idx, out_im)

    print 'done'
    def predict(self, images, feat_output, aggr_feat_output):

        model = self.model
        all_frame_interval = self.all_frame_interval
        feat_sym = self.feat_sym
        aggr_sym = self.aggr_sym

        # load video data
        # data = []
        # for im in images:
        #     target_size = cfg.SCALES[0][0]
        #     max_size = cfg.SCALES[0][1]
        #     im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE)
        #     im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        #     im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

        #     feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        #     data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor,    'feat_cache': im_tensor})
        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)

        # get predictor

        data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
        label_names = []

        t1 = time.time()
        interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
        # data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
        data = VideoLoader(images)
        max_data_shape = [[
            ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                      max([v[1] for v in cfg.SCALES]))),
            ('data_cache', (interval, 3, max([v[0] for v in cfg.SCALES]),
                            max([v[1] for v in cfg.SCALES]))),
            ('feat_cache',
             ((interval, cfg.network.FGFA_FEAT_DIM,
               np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int),
               np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int))))
        ]]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])]
                        for _ in xrange(len(data))]
        provide_label = [None for _ in xrange(len(data))]

        arg_params, aux_params = load_param(cur_path + model, 0, process=True)

        feat_predictors = Predictor(feat_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)
        aggr_predictors = Predictor(aggr_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)

        # First frame of the video
        idx = 0
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        data_list = deque(maxlen=all_frame_interval)
        feat_list = deque(maxlen=all_frame_interval)
        image, feat = get_resnet_output(feat_predictors, data_batch,
                                        data_names)
        # if feat_output is not None:
        #     feat_output.append(feat.asnumpy()[0][:1024])
        # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
        while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
            data_list.append(image)
            feat_list.append(feat)

        vis = False
        file_idx = 0
        thresh = 1e-3
        for idx, element in enumerate(data):

            data_batch = mx.io.DataBatch(
                data=[element],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, element)]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]

            if (idx != len(data) - 1):

                if len(data_list) < all_frame_interval - 1:
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    if feat_output is not None:
                        feat_output.append(feat.asnumpy()[0][:1024])
                    data_list.append(image)
                    feat_list.append(feat)

                else:
                    #################################################
                    # main part of the loop
                    #################################################
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    if feat_output is not None:
                        feat_output.append(feat.asnumpy()[0][:1024])
                    data_list.append(image)
                    feat_list.append(feat)

                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch,
                                                       data_names,
                                                       scales,
                                                       cfg,
                                                       aggr_feats=True)
                    assert len(aggr_feat) == 1
                    if aggr_feat_output is not None:
                        aggr_feat_output.append(aggr_feat[0].asnumpy()[0])

                    data_batch.data[0][-2] = None
                    data_batch.provide_data[0][-2] = ('data_cache', None)
                    data_batch.data[0][-1] = None
                    data_batch.provide_data[0][-1] = ('feat_cache', None)

                    print '\r(main) Testing FGFA R-FCN: {} / {}'.format(
                        file_idx, len(images)),
                    file_idx += 1

            else:
                #################################################
                # end part of a video                           #
                #################################################

                end_counter = 0
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                if feat_output is not None:
                    feat_output.append(feat.asnumpy()[0][:1024])
                while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                    data_list.append(image)
                    feat_list.append(feat)
                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch,
                                                       data_names,
                                                       scales,
                                                       cfg,
                                                       aggr_feats=True)
                    assert len(aggr_feat) == 1
                    if aggr_feat_output is not None:
                        aggr_feat_output.append(aggr_feat[0].asnumpy()[0])

                    print '\r(end) Testing FGFA R-FCN: {} / {}'.format(
                        file_idx, len(images)),
                    file_idx += 1
                    end_counter += 1

        print
Example #6
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/data/output/fgfa_rfcn/vis_drone/resnet_v1_101_flownet_vis_drone_rfcn_end2end_ohem/VID_train_vid/fgfa_rfcn_vid'
    model = '/data2/output/fgfa_rfcn/jrdb/resnet_v1_101_flownet_jrdb/VID_train_15frames_cut/fgfa_rfcn_vid'
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names

    classes = [
        '__background__',  # always index 0
        "ignored regions",
        "pedestrian",
        "people",
        "bicycle",
        "car",
        "van",
        "truck",
        "tricycle",
        "awning-tricycle",
        "bus",
        "motor",
        "others"
    ]

    num_classes = len(classes)
    # load demo data

    classes = ['__background__', 'p']
    num_classes = len(classes)

    image_names = glob.glob(args.input + '/*')
    image_names.sort()
    print("num of images", len(image_names))
    output_dir = "/data2/demo_new2/"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({
            'data': im_tensor,
            'im_info': im_info,
            'data_cache': im_tensor,
            'feat_cache': im_tensor
        })

    # get predictor

    print 'get-predictor'
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                  max([v[1] for v in cfg.SCALES]))),
        ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]),
                        max([v[1] for v in cfg.SCALES]))),
        ('feat_cache',
         ((19, cfg.network.FGFA_FEAT_DIM,
           np.ceil(max([v[0]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int),
           np.ceil(max([v[1]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int))))
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(model, 190, process=True)
    print arg_params
    print aux_params
    feat_predictors = Predictor(feat_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)

    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]],
                                 label=[],
                                 pad=0,
                                 index=idx,
                                 provide_data=[[
                                     (k, v.shape)
                                     for k, v in zip(data_names, data[idx])
                                 ]],
                                 provide_label=[None])
    scales = [
        data_batch.data[i][1].asnumpy()[0, 2]
        for i in xrange(len(data_batch.data))
    ]
    all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = 0.4
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element],
                                     label=[],
                                     pad=0,
                                     index=idx,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, element)
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        if (idx != len(data) - 1):

            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                data_batch.data[0][-2] = None
                data_batch.provide_data[0][-2] = ('data_cache', None)
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(
                    str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(
                    str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                file_idx += 1
                end_counter += 1

    if (cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [
                all_boxes[j][idx] for j in range(1, num_classes)
            ]
            out_im = draw_all_detection(data[idx][0].asnumpy(),
                                        boxes_this_image, classes, scales[0],
                                        cfg)
            save_image(output_dir, idx, out_im)

    print 'done'