Esempio n. 1
0
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path,
              ctx, prefix, epoch,
              vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger = None, output_path = None):
    pprint.pprint(cfg)
    logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg)))
    if has_rpn:
        sym_instance = eval(cfg.symbol+'.'+cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train = False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
        roidb = imdb.get_roidb()
    else:
        assert False,'do not support'

    test_data = TestLoader(roidb,cfg,batch_size = len(ctx),shuffle = shuffle, has_rpn = has_rpn)

    arg_params, aux_params = load_param(prefix, epoch, process = True)

    data_shape_dict = dict(test_data.provide_data)
    sym_instance.infer_shape(data_shape_dict)
    data_names = [k[0] for k in test_data.provide_data]
    label_names = None
    max_data_shape = [('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([int(v[1]//16*16) for v in cfg.SCALES])))]
    predictor = Predictor(sym, data_names, label_names,
                          context = ctx, max_data_shapes = max_data_shape,
                          provide_data = test_data.provide_data,provide_label = test_data.provide_label,
                          arg_params = arg_params, aux_params = aux_params)
    
    pred_eval(predictor, test_data, imdb, cfg, vis = vis, ignore_cache = ignore_cache, thresh = thresh, logger = logger)
Esempio n. 2
0
    def loadModel(self):
        # get symbol
        ctx_id = [int(i) for i in config.gpus.split(',')]
        # pprint.pprint(config)
        sym_instance = eval(config.symbol)()
        sym = sym_instance.get_symbol(config, is_train=False)

        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                     max([v[1] for v in config.SCALES])))]]
        provide_data = [[max_data_shape[0][0], ('im_info', (1L, 3L))]]
        provide_label = [None]
        arg_params, aux_params = load_param(cur_path + config.PREMODEL,
                                            0,
                                            process=True)
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(ctx_id[0])],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)

        return ctx_id, data_names, predictor
Esempio n. 3
0
def get_predictor(sym, sym_instance, cfg, arg_params, aux_params, test_data,
                  ctx):
    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)
    sym_instance.check_parameter_shapes(arg_params,
                                        aux_params,
                                        data_shape_dict,
                                        is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                  max([v[1] for v in cfg.SCALES]))),
        ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]),
                        max([v[1] for v in cfg.SCALES]))),
    ]]

    # create predictor
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data,
                          provide_label=test_data.provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    return predictor
Esempio n. 4
0
def get_predictor_impression_offline(sym, sym_instance, cfg, arg_params,
                                     aux_params, test_data, ctx):
    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)
    sym_instance.check_parameter_shapes(arg_params,
                                        aux_params,
                                        data_shape_dict,
                                        is_train=False)
    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None
    max_data_shape = [[('data_oldkey', (1, 3, max([v[0] for v in cfg.SCALES]),
                                        max([v[1] for v in cfg.SCALES]))),
                       ('data_newkey', (1, 3, max([v[0] for v in cfg.SCALES]),
                                        max([v[1] for v in cfg.SCALES]))),
                       ('data_cur', (1, 3, max([v[0] for v in cfg.SCALES]),
                                     max([v[1] for v in cfg.SCALES]))),
                       ('impression', (1, 1024, 38, 63)),
                       ('key_feat_task', (1, 1024, 38, 63))]]
    # create predictor
    print 'provide_data', test_data.provide_data
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data,
                          provide_label=test_data.provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    return predictor
Esempio n. 5
0
def get_predictor(sym, sym_instance, cfg, arg_params, aux_params, test_data,
                  ctx, max_data_shape):
    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)
    sym_instance.check_parameter_shapes(arg_params,
                                        aux_params,
                                        data_shape_dict,
                                        is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None

    # create predictor
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data,
                          provide_label=test_data.provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    return predictor
Esempio n. 6
0
    def __init__(self):
        sym_instance = eval(config.symbol + '.' + config.symbol)()
        self.symbol = sym_instance.get_symbol(config, is_train=False)
        self.classes = ['box', 'robot']
        logging.debug("Classes: {}".format(self.classes))
        self.scales = config.SCALES[0]
        logging.debug("Scales: {}".format(self.scales))
        self.data_shape_conf = [[('data', (1, 3,
                                           self.scales[0], self.scales[1])),
                                 ('im_info', (1, 3))]]
        self.arg_params, self.aux_params = load_param(os.path.join(
            cur_path, '..', 'models', "rfcn_voc"),
                                                      0,
                                                      process=True)

        self.data_names = ['data', 'im_info']
        self.predictor = Predictor(self.symbol, ['data', 'im_info'], [],
                                   context=[mx.gpu(0)],
                                   max_data_shapes=self.data_shape_conf,
                                   provide_data=self.data_shape_conf,
                                   provide_label=[None],
                                   arg_params=self.arg_params,
                                   aux_params=self.aux_params)
        self.nms = gpu_nms_wrapper(config.TEST.NMS, 0)
        logging.info("Deformable detector initialized")
Esempio n. 7
0
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path,
              ctx, prefix, epoch,
              vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None):
    if not logger:
        assert False, 'require a logger'

    # print cfg
    pprint.pprint(cfg)
    logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg)))

    # load symbol and testing data
    if has_rpn:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train=False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
        roidb = imdb.gt_roidb()

    else:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol_rcnn(cfg, is_train=False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
        gt_roidb = imdb.gt_roidb()
        roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb)

    # get test data iter
  
    test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn)



    # load model
    arg_params, aux_params = load_param(prefix, epoch, process=True)
  
    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
    if not has_rpn:
        max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # start detection
    pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
Esempio n. 8
0
def test_rpn(cfg, dataset, image_set, root_path, dataset_path,
             ctx, prefix, epoch,
             vis, shuffle, thresh, logger=None, output_path=None):
    # set up logger
    if not logger:
        logging.basicConfig()
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)

    # rpn generate proposal cfg
    cfg.TEST.HAS_RPN = True

    # print cfg
    pprint.pprint(cfg)
    logger.info('testing rpn cfg:{}\n'.format(pprint.pformat(cfg)))

    # load symbol
    sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    sym = sym_instance.get_symbol_rpn(cfg, is_train=False)

    # load dataset and prepare imdb for training
    imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
    roidb = imdb.gt_roidb()
    test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=True)

    # load model
    arg_params, aux_params = load_param(prefix, epoch)

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    # check parameters
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data[0]]
    label_names = None if test_data.provide_label[0] is None else [k[0] for k in test_data.provide_label[0]]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # start testing
    imdb_boxes = generate_proposals(predictor, test_data, imdb, cfg, vis=vis, thresh=thresh)

    all_log_info = imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
    logger.info(all_log_info)
Esempio n. 9
0
def test_deeplab():
    epoch = config.TEST.test_epoch
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    image_set = config.dataset.test_image_set
    root_path = config.dataset.root_path
    dataset = config.dataset.dataset
    dataset_path = config.dataset.dataset_path

    logger, final_output_path, experiments_path, _  = create_env(config.output_path, args.cfg, image_set)
    prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix)

    # print config
    logger.info('testing config:{}\n'.format(pprint.pformat(config)))

    # load symbol and testing data
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=experiments_path)
    segdb = imdb.gt_segdb()

    #get test data iter
    batch_size = (config.TEST.BATCH_IMAGES) * len(ctx)
    mctx = ctx

    test_data = TestDataLoader(segdb, config=config, batch_size=batch_size,shuffle=False,ctx=mctx,has_label=imdb.has_label)

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)


    arg_params, aux_params = load_param(prefix, epoch, process=True)

    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = ['label']
    max_data_shape = [[('data', (config.TEST.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # start detection
    args.ignore_cache = True
    pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
def main():
    # get symbol
    pprint.pprint(config)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # load demo data
    image_names = ['000240.jpg', '000437.jpg', '004072.jpg', '007912.jpg']
    image_all = []
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/deform_conv/' + im_name), \
            ('%s does not exist'.format('../demo/deform_conv/' + im_name))
        im = cv2.imread(cur_path + '/../demo/deform_conv/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        image_all.append(im)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/deform_conv', 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # test
    for idx, _ in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])

        output = predictor.predict(data_batch)
        res5a_offset = output[0]['res5a_branch2b_offset_output'].asnumpy()
        res5b_offset = output[0]['res5b_branch2b_offset_output'].asnumpy()
        res5c_offset = output[0]['res5c_branch2b_offset_output'].asnumpy()

        im = image_all[idx]
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_dconv_offset(im, [res5c_offset, res5b_offset, res5a_offset])
Esempio n. 11
0
def get_net(cfg, ctx, prefix, epoch, has_rpn):
    try:
        if has_rpn:
            sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
            sym = sym_instance.get_symbol(cfg, is_train=False)
        else:
            sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
            sym = sym_instance.get_symbol_rcnn(cfg, is_train=False)

        # load model
        arg_params, aux_params = load_param(prefix, epoch, process=True)

        # infer shape
        SHORT_SIDE = config.SCALES[0][0]
        LONG_SIDE = config.SCALES[0][1]
        DATA_NAMES = ['data', 'im_info']
        LABEL_NAMES = None
        DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)),
                       ('im_info', (1, 3))]
        LABEL_SHAPES = None
        data_shape_dict = dict(DATA_SHAPES)
        sym_instance.infer_shape(data_shape_dict)
        sym_instance.check_parameter_shapes(arg_params,
                                            aux_params,
                                            data_shape_dict,
                                            is_train=False)

        # decide maximum shape
        max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                                     max([v[1] for v in cfg.SCALES])))]]
        if not has_rpn:
            max_data_shape.append(
                ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

        # create predictor
        predictor = Predictor(sym,
                              DATA_NAMES,
                              LABEL_NAMES,
                              context=ctx,
                              max_data_shapes=max_data_shape,
                              provide_data=[DATA_SHAPES],
                              provide_label=[LABEL_SHAPES],
                              arg_params=arg_params,
                              aux_params=aux_params)

    except Exception, e:
        print(traceback.format_exc())
        predictor = None
Esempio n. 12
0
def test_fcis(config, dataset, image_set, root_path, dataset_path,
              ctx, prefix, epoch,
              vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None):
    if not logger:
        assert False, 'require a logger'

    # print config
    pprint.pprint(config)
    logger.info('testing config:{}\n'.format(pprint.pformat(config)))

    # load symbol and testing data
    if has_rpn:
        sym_instance = eval(config.symbol)()
        sym = sym_instance.get_symbol(config, is_train=False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path, 
                             binary_thresh=config.BINARY_THRESH, mask_size=config.MASK_SIZE)
        sdsdb = imdb.gt_sdsdb()
    else:
        raise NotImplementedError

    # get test data iter
    test_data = TestLoader(sdsdb, config, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn)

    # load model
    arg_params, aux_params = load_param(prefix, epoch, process=True)

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = []
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    if not has_rpn:
        raise NotImplementedError()

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # start detection
    pred_eval(predictor, test_data, imdb, config, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
Esempio n. 13
0
def get_predictor(sym, image, arg_params, aux_params):
    data = []
    target_size = config.SCALES[0][0]
    max_size = config.SCALES[0][1]
    im, im_scale = resize(image, target_size, max_size, stride=config.network.IMAGE_STRIDE)
    im_tensor = transform(im, config.network.PIXEL_MEANS)
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
    data.append({'data': im_tensor, 'im_info': im_info})
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    return predictor
Esempio n. 14
0
def get_net(cfg, ctx, arg_params, aux_params, has_rpn):
    # pylint: disable=eval-used
    if has_rpn:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train=False)
    else:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol_rcnn(cfg, is_train=False)

    # infer shape
    SHORT_SIDE = config.SCALES[0][0]
    LONG_SIDE = config.SCALES[0][1]
    DATA_NAMES = ['data', 'im_info']
    LABEL_NAMES = None
    DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)),
                   ('im_info', (1, 3))]
    LABEL_SHAPES = None
    data_shape_dict = dict(DATA_SHAPES)
    sym_instance.infer_shape(data_shape_dict)
    sym_instance.check_parameter_shapes(arg_params,
                                        aux_params,
                                        data_shape_dict,
                                        is_train=False)

    # decide maximum shape
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                                 max([v[1] for v in cfg.SCALES])))]]
    if not has_rpn:
        max_data_shape.append(
            ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym,
                          DATA_NAMES,
                          LABEL_NAMES,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=[DATA_SHAPES],
                          provide_label=[LABEL_SHAPES],
                          arg_params=arg_params,
                          aux_params=aux_params)
    return predictor
    def load_data_and_get_predictor(self, image_names):
        # load demo data

        #image_names = ['COCO_test2015_000000000891.jpg',
        #            'COCO_test2015_000000001669.jpg']
        data = []
        for im_name in image_names:
            #assert os.path.exists(
            #    cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
            #im = cv2.imread(cur_path + '/../demo/' + im_name,
            #                cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            im = cv2.imread(im_name, cv2.IMREAD_COLOR |
                            cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im, target_size, max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})

        # get predictor
        self.data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in self.data_names]
                for i in xrange(len(data))]
        max_data_shape = [[('data', (1, 3, max(
            [v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        provide_data = [[(k, v.shape) for k, v in zip(self.data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]
        self.predictor = Predictor(self.sym, self.data_names, label_names,
                                   context=[mx.gpu(1)], max_data_shapes=max_data_shape,
                                   provide_data=provide_data, provide_label=provide_label,
                                   arg_params=self.arg_params, aux_params=self.aux_params)
        self.nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        return data
def load_rfcn_model(cfg,has_rpn,prefix,epoch,ctx):

    if has_rpn:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train=False)
    else:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol_rcnn(cfg, is_train=False)
    # sym.save('1.json')
    # load model
    arg_params, aux_params = load_param(prefix, epoch, process=True)

    num_gpu = len(ctx)
    # infer shape
    SHORT_SIDE = config.SCALES[0][0]
    LONG_SIDE = config.SCALES[0][1]
    DATA_NAMES = ['data', 'im_info']
    LABEL_NAMES = None
    DATA_SHAPES = [('data', (num_gpu, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (num_gpu, 3))]
    LABEL_SHAPES = None
    data_shape_dict = dict(DATA_SHAPES)
    sym_instance.infer_shape(data_shape_dict)
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    max_data_shape = [[('data', (num_gpu, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
    if not has_rpn:
        max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES],
                          arg_params=arg_params, aux_params=aux_params)

    return predictor
Esempio n. 17
0
File: demo.py Progetto: mrlooi/FCIS
    def init_net(self):
        config = self.cfg

        # get symbol
        sym_instance = resnet_v1_101_fcis()
        sym = sym_instance.get_symbol(config, is_train=False)

        # key parameters
        data_names = ['data', 'im_info']
        label_names = []
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]

        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]

        self.data_batch_wr = DataBatchWrapper(target_size, max_size, image_stride=config.network.IMAGE_STRIDE, 
                              pixel_means=config.network.PIXEL_MEANS, data_names=data_names, label_names=label_names)

        im = np.zeros((target_size,max_size,3))
        data_tensor_info = self.data_batch_wr.get_data_tensor_info(im)

        # get predictor
        arg_params, aux_params = load_param_file(self.model_path, process=True)
        print("\nLoaded model %s\n"%(self.model_path))

        self.net = Predictor(sym, data_names, label_names,
                              context=[mx.gpu(self.ctx_id[0])], max_data_shapes=max_data_shape,
                              provide_data=[[(k, v.shape) for k, v in zip(data_names, data_tensor_info)]], provide_label=[None],
                              arg_params=arg_params, aux_params=aux_params)
        self.data_names = data_names

        # # warm up predictor
        for i in xrange(2):
            data_batch = self.data_batch_wr.get_data_batch(im)
            scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
            _, _, _, _ = im_detect(self.net, data_batch, data_names, scales, config)
def demo_rfcn(cfg, dataset, image_set, root_path, dataset_path,
              ctx, prefix, epoch, vis, has_rpn, thresh, use_box_voting):
    if has_rpn:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train=False)
    else:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol_rcnn(cfg, is_train=False)

    # load model
    arg_params, aux_params = load_param(prefix, epoch, process=True)

    # infer shape
    SHORT_SIDE = config.SCALES[0][0]
    LONG_SIDE = config.SCALES[0][1]
    DATA_NAMES = ['data', 'im_info']
    LABEL_NAMES = None
    DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (1, 3))]
    LABEL_SHAPES = None
    data_shape_dict = dict(DATA_SHAPES)
    sym_instance.infer_shape(data_shape_dict)
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
    if not has_rpn:
        max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES],
                          arg_params=arg_params, aux_params=aux_params)

    demo_net(predictor, dataset, image_set,
             root_path, dataset_path, thresh, vis, use_box_voting)
Esempio n. 19
0
def get_predictor(sym, sym_instance, cfg, arg_params, aux_params, test_data, ctx):
    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None
    H = max([v[0] for v in cfg.SCALES])
    W = max([v[1] for v in cfg.SCALES])
    H = int(np.ceil(H * 1.0 / cfg.network.RPN_FEAT_STRIDE))
    W = int(np.ceil(W * 1.0 / cfg.network.RPN_FEAT_STRIDE))
    T = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('data_cache', (T, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ]]

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    return predictor
Esempio n. 20
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn_online_train'
    model = '/../model/rfcn_fgfa_flownet_vid_original'

    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names
    num_classes = 31
    classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data

    snippet_name = 'ILSVRC2015_val_00177001' #'ILSVRC2015_val_00007016'#'ILSVRC2015_val_00000004' # 'ILSVRC2015_val_00007016'# 'ILSVRC2015_val_00044006' #'ILSVRC2015_val_00007010' #'ILSVRC2015_val_00016002'
    # ILSVRC2015_val_00010001: zebra,
    # 37002:cat and fox, motion blur
    # ILSVRC2015_val_00095000: fox, defocus
    image_names = glob.glob(cur_path + '/../demo/' + snippet_name + '/*.JPEG')
    image_names.sort()
    output_dir = cur_path + '/../demo/test_'# rfcn_fgfa_online_train_'
    output_dir_ginst = cur_path + '/../demo/test_ginst_'  # rfcn_fgfa_online_train_'
    output_dir_linst = cur_path + '/../demo/test_linst_'
    if (cfg.TEST.SEQ_NMS):
        output_dir += 'SEQ_NMS_'
        output_dir_ginst += 'SEQ_NMS_'
    output_dir += snippet_name + '/'
    output_dir_ginst += snippet_name + '/'
    output_dir_linst += snippet_name + '/'

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({'data': im_tensor, 'im_info': im_info,  'data_cache': im_tensor,    'feat_cache': im_tensor})



    # get predictor

    print('get-predictor')
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM,
                                                np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int),
                                                np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 0, process=True)

    #add parameters for instance cls & regression
    arg_params['rfcn_ibbox_bias'] = arg_params['rfcn_bbox_bias'].copy()  #deep copy
    arg_params['rfcn_ibbox_weight'] = arg_params['rfcn_bbox_weight'].copy()  #deep copy
    max_inst = cfg.TEST.NUM_INSTANCES


    feat_predictors = Predictor(feat_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)


    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                 provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                 provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    all_boxes = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]
    all_boxes_inst = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]

    ginst_mem = [] # list for instance class
    sim_array_global = [] # similarity array list
    ginst_ID = 0

    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = True
    file_idx = 0
    thresh = (1e-3)
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        if(idx != len(data)-1):
            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #if file_idx ==15:
                #    print( '%d frame' % (file_idx))
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch) #put 19 data & feat list into data_batch
                #aggr_predictors._mod.forward(data_batch) #sidong
                #zxcv= aggr_predictors._mod.get_outputs(merge_multi_context=False)#sidong
                pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg) # get box result [[scores, pred_boxes, rois, data_dict, iscores, ipred_boxes, cropped_embed]]

                data_batch.data[0][-2] = None # 19 frames of data possesses much memory, so clear it
                data_batch.provide_data[0][-2] = ('data_cache', None) # also clear shape info of data
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                ginst_ID_prev = ginst_ID
                ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes,
                                                    all_boxes_inst, file_idx, max_per_image, vis,
                                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales, ginst_mem, sim_array_global, ginst_ID)
                #out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image,
                #                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                ginst_ID_now = ginst_ID
                init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors, arg_params)

                total_time = time.time()-t1
                if (cfg.TEST.SEQ_NMS==False):
                    if cfg.TEST.DISPLAY[0]:
                        save_image(output_dir, file_idx, out_im)
                    if cfg.TEST.DISPLAY[1]:
                        save_image(output_dir_ginst, file_idx, out_im2)
                    if cfg.TEST.DISPLAY[2]:
                        save_image(output_dir_linst, file_idx, out_im_linst)
                #testing by metric


                print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1)))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg)

                ginst_ID_prev = ginst_ID
                ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes,
                                                      all_boxes_inst, file_idx, max_per_image, vis,
                                                      data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales,
                                                      ginst_mem, sim_array_global, ginst_ID)
                # out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image,
                #                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                ginst_ID_now = ginst_ID
                init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors ,arg_params)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    if cfg.TEST.DISPLAY[0]:
                        save_image(output_dir, file_idx, out_im)
                    if cfg.TEST.DISPLAY[1]:
                        save_image(output_dir_ginst, file_idx, out_im2)
                    if cfg.TEST.DISPLAY[2]:
                        save_image(output_dir_linst, file_idx, out_im_linst)
                print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1)))
                file_idx += 1
                end_counter += 1

    if(cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)]
            out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg)
            save_image(output_dir, idx, out_im)

    print('done')
def main(tempFileList, fileOp):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    out_dir = os.path.join(
        cur_path,
        'demo/output/terror-det-rg-data-output/terror-det-v0.9-test/JPEGImages'
    )
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    # set up class names
    num_classes = 7
    classes = [
        'tibetan flag', 'guns', 'knives', 'not terror', 'islamic flag',
        'isis flag'
    ]

    # load demo data
    image_names = tempFileList
    data = []
    for im_name in image_names:
        im_file = im_name
        print(im_file)
        im = cv2.imread(im_file, cv2.IMREAD_COLOR)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/demo/models/' +
                                        ('rfcn_voc'),
                                        10,
                                        process=True)
    #modify by zxt
    #mx.model.save_checkpoint('f1/final', 10, sym, arg_params, aux_params)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    # fileOp = open(os.path.join(cur_path, 'terror-det-rg-test-result.txt'), 'w')
    fileOp = fileOp
    for idx, im_name in enumerate(image_names):
        print("begining process %s" % (im_name))
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(im_name)
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im_result = show_boxes(fileOp, im_name, im, dets_nms, classes, 1)
        cv2.imwrite(out_dir + im_name.split('/')[-1], im_result)
    print 'done'
Esempio n. 22
0
def run_detection(im_root, result_root, conf_threshold):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    print('detection in {}'.format(im_root))
    im_names = sorted(os.listdir(im_root))

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = []
    for idx, im_name in enumerate(im_names[:2]):
        im_file = os.path.join(im_root, im_name)
        im = cv2.imread(im_file,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        data.append({'data': im_tensor, 'im_info': im_info})

    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    arg_params, aux_params = load_param(
        cur_path + '/../model/' +
        ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'),
        config.TEST.test_epoch,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    # nms = gpu_nms_wrapper(config.TEST.NMS, 0)
    # nms = soft_nms_wrapper(config.TEST.NMS, method=2)
    nms = gpu_soft_nms_wrapper(config.TEST.NMS, method=2, device_id=0)

    nms_t = Timer()
    for idx, im_name in enumerate(im_names):
        im_file = os.path.join(im_root, im_name)
        im = cv2.imread(im_file,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        origin_im = im.copy()

        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        # input
        data = [mx.nd.array(im_tensor), mx.nd.array(im_info)]
        data_batch = mx.io.DataBatch(data=[data],
                                     label=[],
                                     pad=0,
                                     index=idx,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data)
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            nms_t.tic()
            keep = nms(cls_dets)
            nms_t.toc()
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.2f}ms'.format(im_name, toc() * 1000)
        print 'nms: {:.2f}ms'.format(nms_t.total_time * 1000)
        nms_t.clear()

        # save results
        person_dets = dets_nms[0]
        with open(os.path.join(result_root, '{:04d}.txt'.format(idx)),
                  'w') as f:
            f.write('{}\n'.format(len(person_dets)))
            for det in person_dets:
                x1, y1, x2, y2, s = det
                w = x2 - x1
                h = y2 - y1
                f.write('0 {} {} {} {} {}\n'.format(s, w, h, x1, y1))

        # visualize
        im = origin_im
        # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im = show_boxes_cv2(im, dets_nms, classes, 1)

        cv2.imshow('det', im)
        cv2.waitKey(1)
Esempio n. 23
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_deeplab'
    model1 = '/../model/rfcn_dff_flownet_vid'
    model2 = '/../model/deeplab_dcn_cityscapes'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    # settings
    num_classes = 19
    interv = args.interval
    num_ex = args.num_ex

    # load demo data
    image_names = sorted(
        glob.glob(cur_path +
                  '/../demo/cityscapes_data/cityscapes_frankfurt_all_i' +
                  str(interv) + '/*.png'))
    image_names = image_names[:interv * num_ex]
    label_files = sorted(
        glob.glob(
            cur_path +
            '/../demo/cityscapes_data/cityscapes_frankfurt_labels_all/*.png'))

    output_dir = cur_path + '/../demo/deeplab_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = interv

    #

    data = []
    key_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            key_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })

    # get predictor
    data_names = ['data', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    # models: rfcn_dff_flownet_vid, deeplab_cityscapes
    arg_params, aux_params = load_param_multi(cur_path + model1,
                                              cur_path + model2,
                                              0,
                                              process=True)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config)
            output_all, _ = im_segment(cur_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]

    print "warmup done"
    # test
    time = 0
    count = 0
    hist = np.zeros((num_classes, num_classes))
    lb_idx = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        if idx % key_frame_interval == 0:
            print '\nframe {} (key)'.format(idx)
            # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            print '\nframe {} (intermediate)'.format(idx)
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config)
            output_all, _ = im_segment(cur_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]

        elapsed = toc()
        time += elapsed
        count += 1
        print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed,
                                                    time / count)

        pred = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(pred)
        pallete = getpallete(256)
        segmentation_result.putpalette(pallete)
        _, im_filename = os.path.split(im_name)
        segmentation_result.save(output_dir + '/seg_' + im_filename)

        label = None

        _, lb_filename = os.path.split(label_files[lb_idx])
        im_comps = im_filename.split('_')
        lb_comps = lb_filename.split('_')
        # if annotation available for frame
        if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]:
            print 'label {}'.format(lb_filename)
            label = np.asarray(Image.open(label_files[lb_idx]))
            if lb_idx < len(label_files) - 1:
                lb_idx += 1

        if label is not None:
            curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes)
            hist += curr_hist
            print 'mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2))
            print '(cum) mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2))

    ious = per_class_iu(hist) * 100
    print ' '.join('{:.03f}'.format(i) for i in ious)
    print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2))

    print 'done'
Esempio n. 24
0
def test_net(args):
    # init config
    cfg_path = args.cfg
    update_config(cfg_path)

    # test parameters
    has_rpn = config.TEST.HAS_RPN
    if not has_rpn:
        raise NotImplementedError, "Network without RPN is not implemented"

    # load model
    model_path = args.model
    if '.params' not in model_path:
        model_path += ".params"
    assert osp.exists(model_path), ("Could not find model path %s" %
                                    (model_path))
    arg_params, aux_params = load_param_file(model_path, process=True)
    print("\nLoaded model %s\n" % (model_path))

    # gpu stuff
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]

    # load test dataset
    cfg_ds = config.dataset
    ds_name = cfg_ds.dataset
    ds_path = cfg_ds.dataset_path
    test_image_set = cfg_ds.test_image_set

    # logger
    logger, output_path = create_logger(config.output_path, args.cfg,
                                        config.dataset.test_image_set)
    logger.info('testing config:{}\n'.format(pprint.pformat(config)))

    if ds_name.lower() == "labelme":
        # from utils.load_data import load_labelme_gt_sdsdb
        imdb = labelme(test_image_set,
                       ds_path,
                       cfg_ds.root_path,
                       mask_size=config.MASK_SIZE,
                       binary_thresh=config.BINARY_THRESH,
                       classes=cfg_ds.CLASSES)
    else:
        imdb = eval(ds_name)(test_image_set,
                             cfg_ds.root_path,
                             ds_path,
                             result_path=output_path,
                             binary_thresh=config.BINARY_THRESH,
                             mask_size=config.MASK_SIZE)
    sdsdb = imdb.gt_sdsdb()

    # load network
    network = resnet_v1_101_fcis()
    sym = network.get_symbol(config, is_train=False)

    # get test data iter
    test_data = TestLoader(sdsdb,
                           config,
                           batch_size=len(ctx),
                           shuffle=args.shuffle,
                           has_rpn=has_rpn)

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    network.infer_shape(data_shape_dict)

    network.check_parameter_shapes(arg_params,
                                   aux_params,
                                   data_shape_dict,
                                   is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = []
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]

    # # create predictor
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data,
                          provide_label=test_data.provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # print(test_data.provide_data_single[0][1])
    # print(test_data.provide_label)

    # start detection
    pred_eval(predictor,
              test_data,
              imdb,
              config,
              vis=args.vis,
              ignore_cache=args.ignore_cache,
              thresh=args.thresh,
              logger=logger)
Esempio n. 25
0
def main():
    global classes

    assert os.path.exists(args.input), ('%s does not exist'.format(args.input))
    im = cv2.imread(args.input,
                    cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    arr = np.array(im)
    origin_width, origin_height, _ = arr.shape

    portion = smart_chipping(origin_width, origin_height)

    # manually update the configuration
    # print(config.SCALES[0][0])
    # TODO: note this is hard coded and assume there are three values for the SCALE configuration
    config.SCALES[0] = (portion, portion, portion)
    # config.max_per_image =
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_fpn_dcn_rcnn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # load demo data
    data = []

    # portion = args.chip_size

    cwn, chn = (portion, portion)
    wn, hn = (int(origin_width / cwn), int(origin_height / chn))
    padding_y = int(
        math.ceil(float(origin_height) / chn) * chn - origin_height)
    padding_x = int(math.ceil(float(origin_width) / cwn) * cwn - origin_width)
    print("padding_y,padding_x, origin_height, origin_width", padding_y,
          padding_x, origin_height, origin_width)
    # top, bottom, left, right - border width in number of pixels in corresponding directions
    im = cv2.copyMakeBorder(im,
                            0,
                            padding_x,
                            0,
                            padding_y,
                            cv2.BORDER_CONSTANT,
                            value=[0, 0, 0])
    # the section below could be optimized. but basically the idea is to re-calculate all the values
    arr = np.array(im)
    width, height, _ = arr.shape
    cwn, chn = (portion, portion)
    wn, hn = (int(width / cwn), int(height / chn))

    image_list = chip_image(im, (portion, portion))
    for im in image_list:
        target_size = portion
        max_size = portion
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        # print("im.shape,im_scale",im.shape,im_scale)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' +
                                        ('fpn_dcn_xview_480_640_800_alltrain'),
                                        11,
                                        process=True)

    # arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_coco' if not args.fpn_only else 'fpn_coco'), 0, process=True)
    print("loading parameter done")

    if args.cpu_only:
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.cpu()],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = py_nms_wrapper(config.TEST.NMS)
    else:
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(args.gpu_index)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    num_preds = int(5000 * math.ceil(float(portion) / 400))
    # test
    boxes, scores, classes = generate_detections(data, data_names, predictor,
                                                 config, nms, image_list,
                                                 num_preds)
    #Process boxes to be full-sized

    print("boxes shape is", boxes.shape, "wn, hn", wn, hn, "width, height",
          width, height)
    bfull = boxes.reshape((wn, hn, num_preds, 4))

    for i in range(wn):
        for j in range(hn):
            bfull[i, j, :, 0] += j * cwn
            bfull[i, j, :, 2] += j * cwn

            bfull[i, j, :, 1] += i * chn
            bfull[i, j, :, 3] += i * chn

            # clip values
            bfull[i, j, :, 0] = np.clip(bfull[i, j, :, 0], 0, origin_height)
            bfull[i, j, :, 2] = np.clip(bfull[i, j, :, 2], 0, origin_height)

            bfull[i, j, :, 1] = np.clip(bfull[i, j, :, 1], 0, origin_width)
            bfull[i, j, :, 3] = np.clip(bfull[i, j, :, 3], 0, origin_width)

    bfull = bfull.reshape((hn * wn, num_preds, 4))
    scores = scores.reshape((hn * wn, num_preds))
    classes = classes.reshape((hn * wn, num_preds))

    #only display boxes with confidence > .5

    # print(bfull, scores, classes)
    #bs = bfull[scores > 0.08]
    #cs = classes[scores>0.08]
    #print("bfull.shape,scores.shape, bs.shape",bfull.shape,scores.shape, bs.shape)
    # s = im_name
    # draw_bboxes(arr,bs,cs).save("/tmp/"+s[0].split(".")[0] + ".png")

    #scoring_line_threshold = 11000

    #if bs.shape[0] > scoring_line_threshold:
    # too many predictions, we should trim the low confidence ones
    with open(args.output, 'w') as f:
        for i in range(bfull.shape[0]):
            for j in range(bfull[i].shape[0]):
                #box should be xmin ymin xmax ymax
                box = bfull[i, j]
                class_prediction = classes[i, j]
                score_prediction = scores[i, j]
                if int(class_prediction) != 0:
                    f.write('%d %d %d %d %d %f \n' % \
                        (box[0], box[1], box[2], box[3], int(class_prediction), score_prediction))

    print('done')
Esempio n. 26
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_fgfa_flownet_vid'
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names
    num_classes = 31
    classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data

    image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn_fgfa/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({'data': im_tensor, 'im_info': im_info,  'data_cache': im_tensor,    'feat_cache': im_tensor})



    # get predictor

    print 'get-predictor'
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM,
                                                np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int),
                                                np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 0, process=True)

    feat_predictors = Predictor(feat_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)


    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                 provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                 provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    all_boxes = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = 1e-3
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        if(idx != len(data)-1):

            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                data_batch.data[0][-2] = None
                data_batch.provide_data[0][-2] = ('data_cache', None)
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis,
                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                total_time = time.time()-t1
                if (cfg.TEST.SEQ_NMS==False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis,
                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1))
                file_idx += 1
                end_counter+=1

    if(cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)]
            out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg)
            save_image(output_dir, idx, out_im)

    print 'done'
Esempio n. 27
0
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    # load demo data
    image_names = []
    names_dirs = os.listdir(cur_path + '/../' + test_dir)
    for im_name in names_dirs:
        if im_name[-4:] == '.jpg' or im_name[-4:] == '.png':
            image_names.append(im_name)

    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../' + test_dir + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../' + test_dir + im_name,
                        cv2.IMREAD_COLOR | long(128))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        #print "before scale: "
        #print im.shape
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print "after scale: "
        #print im.shape
        #im_scale = 1.0
        #print "scale ratio: "
        #print im_scale
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        #print im_tensor.shape
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../' + model_dir,
                                        0,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)
    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, [1.0], config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]
        #print im_shapes

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1],
                im_shapes[0][0], config.TEST.NMS,
                config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(cur_path + '/../' + test_dir + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False)

        # Save img
        cv2.imwrite(cur_path + '/../' + result_dir + im_name,
                    cv2.cvtColor(im, cv2.COLOR_BGR2RGB))

    print 'done'
Esempio n. 28
0
    def batch_extract(self, multiple=True, gt_dir=None, epoch=0):
        """

        :param multiple:
        :param gt_dir:
        :return:
        """
        if len(self.img_list) % self.batch_size != 0:
            batch = len(self.img_list) / self.batch_size + 1
        else:
            batch = len(self.img_list) / self.batch_size

        for i in xrange(batch):

            if i < batch - 1:
                self.batch_list = self.img_list[i * self.batch_size:(i + 1) *
                                                self.batch_size]
            else:
                self.batch_list = self.img_list[i * self.batch_size:]

            print '\nMini-batch %d\t' % (i + 1)

            tmp_data = []
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]

            tic()
            for img in self.batch_list:
                assert os.path.exists(img), ('%s does not exist.'.format(img))
                im = cv2.imread(
                    img, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
                im, im_scale = resize(im,
                                      target_size,
                                      max_size,
                                      stride=config.network.IMAGE_STRIDE)
                im_tensor = transform(im, config.network.PIXEL_MEANS)
                # im_info: height, width, scale
                im_info = np.array(
                    [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                    dtype=np.float32)
                tmp_data.append({
                    self.data_names[0]: im_tensor,
                    self.data_names[1]: im_info
                })

            self.ctx = [int(i) for i in config.gpus.split(',')]
            self.data = [[
                mx.nd.array(tmp_data[i][name], mx.gpu(self.ctx[0]))
                for name in self.data_names
            ] for i in xrange(len(tmp_data))]

            max_data_shape = [[(self.data_names[0],
                                (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
            provide_data = [[(k, v.shape)
                             for k, v in zip(self.data_names, self.data[i])]
                            for i in xrange(len(self.data))]
            provide_label = [None for i in xrange(len(self.data))]

            arg_params, aux_params = load_param(self.model_dir,
                                                epoch,
                                                process=True)

            self.predictor = Predictor(self.sym,
                                       self.data_names,
                                       self.label_name,
                                       context=[mx.gpu(self.ctx[0])],
                                       max_data_shapes=max_data_shape,
                                       provide_data=provide_data,
                                       provide_label=provide_label,
                                       arg_params=arg_params,
                                       aux_params=aux_params)
            print 'preparation: %.4fs' % toc()

            if i == 0:
                self.warmup()

            self.forward(multiple=multiple, gt_dir=gt_dir)

        self.cleaner()
Esempio n. 29
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    # load demo data
    image_path = './data/RoadImages/test/'
    image_names = glob.glob(image_path + '*.jpg')

    print("Image amount {}".format(len(image_names)))
    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][1]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road',
        19,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # test
    notation_dict = {}
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # notation_list.append(get_notation(im_name, dets_nms, classes, scale=1.0, gen_bbox_pic=True))
        notation_dict.update(
            get_notation(im_name,
                         dets_nms,
                         classes,
                         scale=1.0,
                         gen_bbox_pic=True))
    save_notation_file(notation_dict)
    print 'done'
    def predict(self, images, feat_output, aggr_feat_output):

        model = self.model
        all_frame_interval = self.all_frame_interval
        feat_sym = self.feat_sym
        aggr_sym = self.aggr_sym
        num_classes = self.num_classes
        classes = self.classes
        max_per_image = self.max_per_image

        output_dir = cur_path + '/../demo/rfcn_fgfa_{}/'.format(self.index)
        self.index += 1
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        data = []
        for im in images:
            target_size = cfg.SCALES[0][0]
            max_size = cfg.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=cfg.network.IMAGE_STRIDE)
            im_tensor = transform(im, cfg.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)

            feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
            data.append({
                'data': im_tensor,
                'im_info': im_info,
                'data_cache': im_tensor,
                'feat_cache': im_tensor
            })

        # get predictor

        print 'get-predictor'
        data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
        label_names = []

        t1 = time.time()
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        max_data_shape = [[
            ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                      max([v[1] for v in cfg.SCALES]))),
            ('data_cache', (11, 3, max([v[0] for v in cfg.SCALES]),
                            max([v[1] for v in cfg.SCALES]))),
            ('feat_cache',
             ((11, cfg.network.FGFA_FEAT_DIM,
               np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int),
               np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int))))
        ]]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for _ in xrange(len(data))]

        arg_params, aux_params = load_param(cur_path + model, 0, process=True)

        feat_predictors = Predictor(feat_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)
        aggr_predictors = Predictor(aggr_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)
        nms = py_nms_wrapper(cfg.TEST.NMS)

        # First frame of the video
        idx = 0
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        all_boxes = [[[] for _ in range(len(data))]
                     for _ in range(num_classes)]
        data_list = deque(maxlen=all_frame_interval)
        feat_list = deque(maxlen=all_frame_interval)
        image, feat = get_resnet_output(feat_predictors, data_batch,
                                        data_names)
        # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
        while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
            data_list.append(image)
            feat_list.append(feat)

        vis = False
        file_idx = 0
        thresh = 1e-3
        for idx, element in enumerate(data):

            data_batch = mx.io.DataBatch(
                data=[element],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, element)]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]

            if (idx != len(data) - 1):

                if len(data_list) < all_frame_interval - 1:
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    data_list.append(image)
                    feat_list.append(feat)

                else:
                    #################################################
                    # main part of the loop
                    #################################################
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    data_list.append(image)
                    feat_list.append(feat)

                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch, data_names,
                                                       scales, cfg)
                    assert len(aggr_feat) == 1

                    data_batch.data[0][-2] = None
                    data_batch.provide_data[0][-2] = ('data_cache', None)
                    data_batch.data[0][-1] = None
                    data_batch.provide_data[0][-1] = ('feat_cache', None)

                    out_im = process_pred_result(
                        classes, pred_result, num_classes, thresh, cfg, nms,
                        all_boxes, file_idx, max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(),
                        scales)
                    total_time = time.time() - t1
                    if (cfg.TEST.SEQ_NMS == False):
                        save_image(output_dir, file_idx, out_im)
                    print 'testing {} {:.4f}s'.format(
                        str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                    file_idx += 1

            else:
                #################################################
                # end part of a video                           #
                #################################################

                end_counter = 0
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                    data_list.append(image)
                    feat_list.append(feat)
                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch, data_names,
                                                       scales, cfg)
                    assert len(aggr_feat) == 1

                    out_im = process_pred_result(
                        classes, pred_result, num_classes, thresh, cfg, nms,
                        all_boxes, file_idx, max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(),
                        scales)

                    total_time = time.time() - t1
                    if (cfg.TEST.SEQ_NMS == False):
                        save_image(output_dir, file_idx, out_im)
                    print 'testing {} {:.4f}s'.format(
                        str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                    file_idx += 1
                    end_counter += 1

        if (cfg.TEST.SEQ_NMS):
            video = [all_boxes[j][:] for j in range(1, num_classes)]
            dets_all = seq_nms(video)
            for cls_ind, dets_cls in enumerate(dets_all):
                for frame_ind, dets in enumerate(dets_cls):
                    keep = nms(dets)
                    all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
            for idx in range(len(data)):
                boxes_this_image = [[]] + [
                    all_boxes[j][idx] for j in range(1, num_classes)
                ]
                out_im = draw_all_detection(data[idx][0].asnumpy(),
                                            boxes_this_image, classes,
                                            scales[0], cfg)
                save_image(output_dir, idx, out_im)

        print 'done'