Esempio n. 1
0
            def multi_detect_input(im):

                #print(im.shape)

                target_size = config.SCALES[0][0]
                max_size = config.SCALES[0][1]
                im, im_scale = resize(im,
                                      target_size,
                                      max_size,
                                      stride=config.network.IMAGE_STRIDE)
                im_tensor = transform(im, config.network.PIXEL_MEANS)
                im_info = np.array(
                    [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                    dtype=np.float32)

                data_idx = [{"data": im_tensor, "im_info": im_info}]
                data_idx = [[
                    mx.nd.array(data_idx[i][name]) for name in data_names
                ] for i in xrange(len(data_idx))]
                data_batch = mx.io.DataBatch(
                    data=[data_idx[0]],
                    label=[],
                    pad=0,
                    index=idx,
                    provide_data=[[(k, v.shape)
                                   for k, v in zip(data_names, data_idx[0])]],
                    provide_label=[None])

                scales = [
                    data_batch.data[i][1].asnumpy()[0, 2]
                    for i in xrange(len(data_batch.data))
                ]
                return data_batch, scales
Esempio n. 2
0
    def predict(self, imarray, pixel_means, pixel_stds, crop_size=512, color_scale=-1,
                    feature_ratio= 2.0 / 3, num_steps=1, output_name="softmax",feature_stride = 8):

            im_tensor = transform(imarray, pixel_means, color_scale=color_scale, pixel_stds=pixel_stds)
            long_size = max(im_tensor[2:])
            if(long_size<crop_size):
                return self.predict_patch(im_tensor,feature_stride,num_steps,output_name)
Esempio n. 3
0
File: demo.py Progetto: mrlooi/FCIS
 def get_data_tensor_info(self, im):
     im_, im_scale = resize(im, self.target_size, self.max_size, stride=self.image_stride)
     im_tensor = transform(im_, self.pixel_means)
     im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
     data_tensor_info = [mx.nd.array(im_tensor), mx.nd.array(im_info)]
     
     return data_tensor_info
def generate_batch(im):
    """
    preprocess image, return batch
    :param im: cv2.imread returns [height, width, channel] in BGR
    :return:
    data_batch: MXNet input batch
    data_names: names in data_batch
    im_scale: float number
    """
    SHORT_SIDE = config.SCALES[0][0]
    LONG_SIDE = config.SCALES[0][1]
    PIXEL_MEANS = config.network.PIXEL_MEANS
    DATA_NAMES = ['data', 'im_info']

    im_array, im_scale = resize(im, SHORT_SIDE, LONG_SIDE)
    im_array = transform(im_array, PIXEL_MEANS)
    im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]],
                       dtype=np.float32)
    data = [[mx.nd.array(im_array), mx.nd.array(im_info)]]
    data_shapes = [[('data', im_array.shape), ('im_info', im_info.shape)]]
    data_batch = mx.io.DataBatch(data=data,
                                 label=[None],
                                 provide_data=data_shapes,
                                 provide_label=[None])
    return data_batch, DATA_NAMES, [im_scale]
Esempio n. 5
0
def det(mod, fn):
    
    raw_img = cv2.imread(fn)

    if raw_img.shape[0] < raw_img.shape[1]:
        raw_img = cv2.copyMakeBorder(raw_img,0
        ,raw_img.shape[1]-raw_img.shape[0], 0, 0, cv2.BORDER_CONSTANT)

    im_shape = [IMG_H,IMG_W] # reverse order
    img = cv2.resize(raw_img, (IMG_H,IMG_W))
    raw_h = img.shape[0]
    raw_w = img.shape[1]

    im_tensor = image.transform(img, [124,117,104], 0.0167)

    im_info = np.array([[  IMG_H,   IMG_W,   4.18300658e-01]])

    batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)])

    start = time.time()
    mod.forward(batch)

    output_names = mod.output_names
    output_tensor = mod.get_outputs()
    mod.get_outputs()[0].wait_to_read()
    print ("time", time.time()-start, "secs.")

    output = dict(zip(output_names ,output_tensor))

    rois = output['rois_output'].asnumpy()[:, 1:]
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    num_classes = 2

    all_cls_dets = [[] for _ in range(num_classes)]

    for j in range(1, num_classes):
        indexes = np.where(scores[:, j] > 0.1)[0]
        cls_scores = scores[indexes, j, np.newaxis]
        cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4]
        cls_dets = np.hstack((cls_boxes, cls_scores)).copy()
        all_cls_dets[j] = cls_dets

    for idx_class in range(1, num_classes):
        nms = py_nms_wrapper(0.3)
        keep = nms(all_cls_dets[idx_class])
        all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :]

    for i in range(all_cls_dets[1].shape[0]):
        cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1]))
        ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),1)

    
    cv2.imshow("w", img)
    cv2.waitKey()
Esempio n. 6
0
def main(im_name, frame, score):
    data = []

    # only resize input image to target size and return scale
    im, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE)

    im_tensor = transform(im, config.network.PIXEL_MEANS)
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
    data.append({'data': im_tensor, 'im_info': im_info})

    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]

    data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                    provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                    provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

    tic()
    scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
    im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))]

    masks = masks[0][:, 1:, :, :]
    im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
    im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
    # print (im_height, im_width)
    boxes = clip_boxes(boxes[0], (im_height, im_width))
    result_masks, result_dets = cpu_mask_voting(masks, boxes, scores[0], num_classes,
                                                100, im_width, im_height,
                                                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                                                config.BINARY_THRESH)

    dets = [result_dets[j] for j in range(1, num_classes)]
    masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)]
    
    print 'testing {} {:.4f}s'.format(im_name, toc())

    min_confidence = score

    # visualize
    for i in xrange(len(dets)):
        keep = np.where(dets[i][:,-1] > min_confidence)
        dets[i] = dets[i][keep]
        masks[i] = masks[i][keep]

    '''
    dets: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
        [x1 y1] - upper left corner of object
        [x2 y2] - lower right corner of object

    masks: [ numpy.ndarray([21, 21]) for j in classes ]
        confidence that pixel belongs to object
    '''

    for i in range(len(dets)):
        if len(dets[i]) > 0:
            for j in range(len(dets[i])):
                print('{name}: {score} ({loc})'.format(name = classes[i], score = dets[i][j][-1], loc = dets[i][j][:-1].tolist()))
Esempio n. 7
0
def fcis_seg(image, classes, predictor, args):
    num_classes = len(classes) + 1
    data = []
    target_size = config.SCALES[0][0]
    max_size = config.SCALES[0][1]
    im, im_scale = resize(image, target_size, max_size, stride=config.network.IMAGE_STRIDE)
    im_tensor = transform(im, config.network.PIXEL_MEANS)
    start = time.time()
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
    data.append({'data': im_tensor, 'im_info': im_info})
    data_names = ['data', 'im_info']
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                 provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                 provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
    im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))]

    if not config.TEST.USE_MASK_MERGE:
        all_boxes = [[] for _ in xrange(num_classes)]
        all_masks = [[] for _ in xrange(num_classes)]
        nms = py_nms_wrapper(config.TEST.NMS)
        for j in range(1, num_classes):
            indexes = np.where(scores[0][:, j] > 0.7)[0]
            cls_scores = scores[0][indexes, j, np.newaxis]
            cls_masks = masks[0][indexes, 1, :, :]
            try:
                if config.CLASS_AGNOSTIC:
                    cls_boxes = boxes[0][indexes, :]
                else:
                    raise Exception()
            except:
                cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            all_boxes[j] = cls_dets[keep, :]
            all_masks[j] = cls_masks[keep, :]
        dets = [all_boxes[j] for j in range(1, num_classes)]
        masks = [all_masks[j] for j in range(1, num_classes)]
    else:
        masks = masks[0][:, 1:, :, :]
        im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
        im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
        # print (im_height, im_width)
        boxes = clip_boxes(boxes[0], (im_height, im_width))
        result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes,
                                                    100, im_width, im_height,
                                                    config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                                                    config.BINARY_THRESH, 0)

        dets = [result_dets[j] for j in range(1, num_classes)]
        masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)]
    cods, bimsks, names = decode_mask(im, dets, masks, classes, config, args)
    return cods, bimsks, names
def main():
    # get symbol
    pprint.pprint(config)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol_rfcn(config, is_train=False)

    # load demo data
    image_names = ['000057.jpg', '000149.jpg', '000351.jpg', '002535.jpg']
    image_all = []
    # ground truth boxes
    gt_boxes_all = [np.array([[132, 52, 384, 357]]), np.array([[113, 1, 350, 360]]),
                    np.array([[0, 27, 329, 155]]), np.array([[8, 40, 499, 289]])]
    gt_classes_all = [np.array([3]), np.array([16]), np.array([7]), np.array([12])]
    data = []
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(cur_path + '/../demo/deform_psroi/' + im_name), \
            ('%s does not exist'.format('../demo/deform_psroi/' + im_name))
        im = cv2.imread(cur_path + '/../demo/deform_psroi/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        image_all.append(im)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        gt_boxes = gt_boxes_all[idx]
        gt_boxes = np.round(gt_boxes * im_scale)
        data.append({'data': im_tensor, 'rois': np.hstack((np.zeros((gt_boxes.shape[0], 1)), gt_boxes))})

    # get predictor
    data_names = ['data', 'rois']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/deform_psroi', 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # test
    for idx, _ in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])

        output = predictor.predict(data_batch)
        cls_offset = output[0]['rfcn_cls_offset_output'].asnumpy()

        im = image_all[idx]
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        boxes = gt_boxes_all[idx]
        show_dpsroi_offset(im, boxes, cls_offset, gt_classes_all[idx])
def main():
    # get symbol
    pprint.pprint(config)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # load demo data
    image_names = ['000240.jpg', '000437.jpg', '004072.jpg', '007912.jpg']
    image_all = []
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/deform_conv/' + im_name), \
            ('%s does not exist'.format('../demo/deform_conv/' + im_name))
        im = cv2.imread(cur_path + '/../demo/deform_conv/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        image_all.append(im)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/deform_conv', 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # test
    for idx, _ in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])

        output = predictor.predict(data_batch)
        res5a_offset = output[0]['res5a_branch2b_offset_output'].asnumpy()
        res5b_offset = output[0]['res5b_branch2b_offset_output'].asnumpy()
        res5c_offset = output[0]['res5c_branch2b_offset_output'].asnumpy()

        im = image_all[idx]
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_dconv_offset(im, [res5c_offset, res5b_offset, res5a_offset])
def main():
    # get symbol
    pprint.pprint(config)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # load demo data
    image_names = ['000240.jpg', '000437.jpg', '004072.jpg', '007912.jpg']
    image_all = []
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/deform_conv/' + im_name), \
            ('%s does not exist'.format('../demo/deform_conv/' + im_name))
        im = cv2.imread(cur_path + '/../demo/deform_conv/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        image_all.append(im)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/deform_conv', 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # test
    for idx, _ in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])

        output = predictor.predict(data_batch)
        res5a_offset = output[0]['res5a_branch2b_offset_output'].asnumpy()
        res5b_offset = output[0]['res5b_branch2b_offset_output'].asnumpy()
        res5c_offset = output[0]['res5c_branch2b_offset_output'].asnumpy()

        im = image_all[idx]
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_dconv_offset(im, [res5c_offset, res5b_offset, res5a_offset])
    def _load_frame(self, idx):
        im = self.images[idx]
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        return mx.nd.array(im_tensor), mx.nd.array(im_info)
Esempio n. 12
0
    def predict(self, im):
        im, im_scale = resize(im,
                              self.scales[0],
                              self.scales[1],
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data = {'data': im_tensor, 'im_info': im_info}
        data = [[
            mx.nd.array(data[self.data_names[0]]),
            mx.nd.array(data[self.data_names[1]])
        ]]
        data_batch = mx.io.DataBatch(
            data=data,
            label=[],
            pad=0,
            index=0,
            provide_data=[[(k, v.shape)
                           for k, v in zip(self.data_names, data[0])]],
            provide_label=[None])

        scores, boxes, data_dict = im_detect(self.predictor, data_batch,
                                             self.data_names, [im_scale],
                                             config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = self.nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)

        res = {}
        for idx, cls in enumerate(self.classes):
            res['{}'.format(cls)] = dets_nms[idx].tolist()
        logging.debug("Predictions: {}".format(res))
        return res
Esempio n. 13
0
    def parfetch(self, iroidb):
        # get testing data for multigpu
        data, label = get_rpn_pair_mv_batch(iroidb, self.cfg)

        #for k, v in data.items():
        #    print(k, v)
        data_shape = {k: v.shape for k, v in data.items()}
        #print(data_shape)
        del data_shape['im_info']
        del data_shape['data']

        data_shape1 = copy.deepcopy(data_shape)
        del data_shape1['eq_flag']
        del data_shape1['motion_vector']
        _, feat_shape, _ = self.feat_conv_3x3_relu.infer_shape(**data_shape1)
        #print('feat_shape: ', feat_shape)

        #print('shape: ', data['motion_vector'].shape)
        #print("size: ", int(feat_shape[0][2]), int(feat_shape[0][3]))
        data['motion_vector'] = data['motion_vector'].astype('float64')
        data['motion_vector'] = cv2.resize(
            data['motion_vector'],
            (int(feat_shape[0][3]), int(feat_shape[0][2])),
            interpolation=cv2.INTER_AREA)
        #print('data[\'motion_vector\'].shape: ', data['motion_vector'].shape)
        data['motion_vector'] = transform(data['motion_vector'], [0, 0])
        #print('data[\'motion_vector\'].shape: ', data['motion_vector'].shape)
        #print("data['motion_vector']: ", data['motion_vector'])
        #data['motion_vector'] = cv2.resize(data['motion_vector'], (36, 63))
        data_shape = {k: v.shape for k, v in data.items()}
        #print(data_shape)
        del data_shape['im_info']
        del data_shape['data']

        _, feat_shape, _ = self.feat_sym.infer_shape(**data_shape)
        feat_shape = [int(i) for i in feat_shape[0]]

        # add gt_boxes to data for e2e
        data['gt_boxes'] = label['gt_boxes'][np.newaxis, :, :]

        # assign anchor for label
        label = assign_anchor(feat_shape, label['gt_boxes'], data['im_info'],
                              self.cfg, self.feat_stride, self.anchor_scales,
                              self.anchor_ratios, self.allowed_border,
                              self.normalize_target, self.bbox_mean,
                              self.bbox_std)
        return {'data': data, 'label': label}
Esempio n. 14
0
    def loadImage(self, im, bg):
        self.im = im
        self.fg = im
        self.bg = bg

        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        self.im, im_scale = resize(self.im,
                                   target_size,
                                   max_size,
                                   stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(self.im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        self.data = {'data': im_tensor, 'im_info': im_info}
        self.data = [mx.nd.array(self.data[name]) for name in self.data_names]
Esempio n. 15
0
def load_test(filename, short, max_size, mean, std):
    # read and transform image
    im_orig = imdecode(filename)
    im, im_scale = resize(im_orig, short, max_size)
    height, width = im.shape[:2]
    im_info = mx.nd.array([height, width, im_scale])

    # transform into tensor and normalize
    im_tensor = transform(im, mean, std)

    # for 1-batch inference purpose, cannot use batchify (or nd.stack) to expand dims
    im_tensor = mx.nd.array(im_tensor).expand_dims(0)
    im_info = mx.nd.array(im_info).expand_dims(0)

    # transform cv2 BRG image to RGB for matplotlib
    im_orig = im_orig[:, :, (2, 1, 0)]
    return im_tensor, im_info, im_orig
Esempio n. 16
0
def get_predictor(sym, image, arg_params, aux_params):
    data = []
    target_size = config.SCALES[0][0]
    max_size = config.SCALES[0][1]
    im, im_scale = resize(image, target_size, max_size, stride=config.network.IMAGE_STRIDE)
    im_tensor = transform(im, config.network.PIXEL_MEANS)
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
    data.append({'data': im_tensor, 'im_info': im_info})
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    return predictor
Esempio n. 17
0
    def _load_frame(self, idx):
        im_name = self.images_names[idx]
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if idx == 100:
            im[...] = 0
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        return mx.nd.array(im_tensor), mx.nd.array(im_info)
    def load_data_and_get_predictor(self, image_names):
        # load demo data

        #image_names = ['COCO_test2015_000000000891.jpg',
        #            'COCO_test2015_000000001669.jpg']
        data = []
        for im_name in image_names:
            #assert os.path.exists(
            #    cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
            #im = cv2.imread(cur_path + '/../demo/' + im_name,
            #                cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            im = cv2.imread(im_name, cv2.IMREAD_COLOR |
                            cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im, target_size, max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})

        # get predictor
        self.data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in self.data_names]
                for i in xrange(len(data))]
        max_data_shape = [[('data', (1, 3, max(
            [v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        provide_data = [[(k, v.shape) for k, v in zip(self.data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]
        self.predictor = Predictor(self.sym, self.data_names, label_names,
                                   context=[mx.gpu(1)], max_data_shapes=max_data_shape,
                                   provide_data=provide_data, provide_label=provide_label,
                                   arg_params=self.arg_params, aux_params=self.aux_params)
        self.nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        return data
def generate_batch_V2(im,num_gpu):
    """
    preprocess image, return batch
    :param im: cv2.imread returns [height, width, channel] in BGR
    :return:
    data_batch: MXNet input batch
    data_names: names in data_batch
    im_scale: float number
    """
    array_list , info_list =[],[]
    for idx in range(0,num_gpu):
        SHORT_SIDE = config.SCALES[0][0]
        LONG_SIDE = config.SCALES[0][1]
        PIXEL_MEANS = config.network.PIXEL_MEANS
        DATA_NAMES = ['data', 'im_info']
        im_array, im_scale = resize(im[idx], SHORT_SIDE, LONG_SIDE)
        im_array = transform(im_array, PIXEL_MEANS)
        im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32)
        array_list.append(im_array)
        info_list.append(im_info)

    data = [[mx.nd.array(_), mx.nd.array(__)] for _ in array_list,for __ in info_list]
    def read_data(self, im_name):  #data (1,3, 562,1000)
        data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        print("About to read ", im_name)
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = self.cfg.SCALES[0][0]
        max_size = self.cfg.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=self.cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, self.cfg.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        d = {
            'data': mx.nd.array(im_tensor),
            'im_info': mx.nd.array(im_info),
            'data_cache': mx.nd.array(im_tensor),
            'feat_cache': mx.nd.array(im_tensor)
        }

        return [d[data_name] for data_name in data_names]
Esempio n. 21
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn_online_train'
    model = '/../model/rfcn_fgfa_flownet_vid_original'

    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names
    num_classes = 31
    classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data

    snippet_name = 'ILSVRC2015_val_00177001' #'ILSVRC2015_val_00007016'#'ILSVRC2015_val_00000004' # 'ILSVRC2015_val_00007016'# 'ILSVRC2015_val_00044006' #'ILSVRC2015_val_00007010' #'ILSVRC2015_val_00016002'
    # ILSVRC2015_val_00010001: zebra,
    # 37002:cat and fox, motion blur
    # ILSVRC2015_val_00095000: fox, defocus
    image_names = glob.glob(cur_path + '/../demo/' + snippet_name + '/*.JPEG')
    image_names.sort()
    output_dir = cur_path + '/../demo/test_'# rfcn_fgfa_online_train_'
    output_dir_ginst = cur_path + '/../demo/test_ginst_'  # rfcn_fgfa_online_train_'
    output_dir_linst = cur_path + '/../demo/test_linst_'
    if (cfg.TEST.SEQ_NMS):
        output_dir += 'SEQ_NMS_'
        output_dir_ginst += 'SEQ_NMS_'
    output_dir += snippet_name + '/'
    output_dir_ginst += snippet_name + '/'
    output_dir_linst += snippet_name + '/'

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({'data': im_tensor, 'im_info': im_info,  'data_cache': im_tensor,    'feat_cache': im_tensor})



    # get predictor

    print('get-predictor')
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM,
                                                np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int),
                                                np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 0, process=True)

    #add parameters for instance cls & regression
    arg_params['rfcn_ibbox_bias'] = arg_params['rfcn_bbox_bias'].copy()  #deep copy
    arg_params['rfcn_ibbox_weight'] = arg_params['rfcn_bbox_weight'].copy()  #deep copy
    max_inst = cfg.TEST.NUM_INSTANCES


    feat_predictors = Predictor(feat_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)


    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                 provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                 provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    all_boxes = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]
    all_boxes_inst = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]

    ginst_mem = [] # list for instance class
    sim_array_global = [] # similarity array list
    ginst_ID = 0

    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = True
    file_idx = 0
    thresh = (1e-3)
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        if(idx != len(data)-1):
            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #if file_idx ==15:
                #    print( '%d frame' % (file_idx))
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch) #put 19 data & feat list into data_batch
                #aggr_predictors._mod.forward(data_batch) #sidong
                #zxcv= aggr_predictors._mod.get_outputs(merge_multi_context=False)#sidong
                pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg) # get box result [[scores, pred_boxes, rois, data_dict, iscores, ipred_boxes, cropped_embed]]

                data_batch.data[0][-2] = None # 19 frames of data possesses much memory, so clear it
                data_batch.provide_data[0][-2] = ('data_cache', None) # also clear shape info of data
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                ginst_ID_prev = ginst_ID
                ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes,
                                                    all_boxes_inst, file_idx, max_per_image, vis,
                                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales, ginst_mem, sim_array_global, ginst_ID)
                #out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image,
                #                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                ginst_ID_now = ginst_ID
                init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors, arg_params)

                total_time = time.time()-t1
                if (cfg.TEST.SEQ_NMS==False):
                    if cfg.TEST.DISPLAY[0]:
                        save_image(output_dir, file_idx, out_im)
                    if cfg.TEST.DISPLAY[1]:
                        save_image(output_dir_ginst, file_idx, out_im2)
                    if cfg.TEST.DISPLAY[2]:
                        save_image(output_dir_linst, file_idx, out_im_linst)
                #testing by metric


                print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1)))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg)

                ginst_ID_prev = ginst_ID
                ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes,
                                                      all_boxes_inst, file_idx, max_per_image, vis,
                                                      data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales,
                                                      ginst_mem, sim_array_global, ginst_ID)
                # out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image,
                #                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                ginst_ID_now = ginst_ID
                init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors ,arg_params)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    if cfg.TEST.DISPLAY[0]:
                        save_image(output_dir, file_idx, out_im)
                    if cfg.TEST.DISPLAY[1]:
                        save_image(output_dir_ginst, file_idx, out_im2)
                    if cfg.TEST.DISPLAY[2]:
                        save_image(output_dir_linst, file_idx, out_im_linst)
                print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1)))
                file_idx += 1
                end_counter += 1

    if(cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)]
            out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg)
            save_image(output_dir, idx, out_im)

    print('done')
Esempio n. 22
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_deeplab'
    model1 = '/../model/rfcn_dff_flownet_vid'
    model2 = '/../model/deeplab_dcn_cityscapes'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    # settings
    num_classes = 19
    interv = args.interval
    num_ex = args.num_ex

    # load demo data
    image_names = sorted(
        glob.glob(cur_path +
                  '/../demo/cityscapes_data/cityscapes_frankfurt_all_i' +
                  str(interv) + '/*.png'))
    image_names = image_names[:interv * num_ex]
    label_files = sorted(
        glob.glob(
            cur_path +
            '/../demo/cityscapes_data/cityscapes_frankfurt_labels_all/*.png'))

    output_dir = cur_path + '/../demo/deeplab_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = interv

    #

    data = []
    key_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            key_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })

    # get predictor
    data_names = ['data', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    # models: rfcn_dff_flownet_vid, deeplab_cityscapes
    arg_params, aux_params = load_param_multi(cur_path + model1,
                                              cur_path + model2,
                                              0,
                                              process=True)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config)
            output_all, _ = im_segment(cur_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]

    print "warmup done"
    # test
    time = 0
    count = 0
    hist = np.zeros((num_classes, num_classes))
    lb_idx = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        if idx % key_frame_interval == 0:
            print '\nframe {} (key)'.format(idx)
            # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            print '\nframe {} (intermediate)'.format(idx)
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config)
            output_all, _ = im_segment(cur_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]

        elapsed = toc()
        time += elapsed
        count += 1
        print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed,
                                                    time / count)

        pred = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(pred)
        pallete = getpallete(256)
        segmentation_result.putpalette(pallete)
        _, im_filename = os.path.split(im_name)
        segmentation_result.save(output_dir + '/seg_' + im_filename)

        label = None

        _, lb_filename = os.path.split(label_files[lb_idx])
        im_comps = im_filename.split('_')
        lb_comps = lb_filename.split('_')
        # if annotation available for frame
        if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]:
            print 'label {}'.format(lb_filename)
            label = np.asarray(Image.open(label_files[lb_idx]))
            if lb_idx < len(label_files) - 1:
                lb_idx += 1

        if label is not None:
            curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes)
            hist += curr_hist
            print 'mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2))
            print '(cum) mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2))

    ious = per_class_iu(hist) * 100
    print ' '.join('{:.03f}'.format(i) for i in ious)
    print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2))

    print 'done'
Esempio n. 23
0
mod_key.set_params(arg_params, aux_params)

#%%
data = []
key_im_tensor = None
for idx, im_name in enumerate(image_names):
    assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
    im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    target_size = config.SCALES[0][0]
    max_size = config.SCALES[0][1]
    im, im_scale = resize(im,
                          target_size,
                          max_size,
                          stride=config.network.IMAGE_STRIDE)
    im_tensor = transform(im, config.network.PIXEL_MEANS)
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                       dtype=np.float32)
    # print(im_info.shape)
    # print im_info
    if idx % key_frame_interval == 0:
        key_im_tensor = im_tensor
    data.append({
        'data': im_tensor,
        'im_info': im_info,
        'data_key': key_im_tensor,
        'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 36, 63))
    })

# get predictor
# data_names = ['data', 'im_info', 'data_key', 'feat_key']
Esempio n. 24
0
def run_detection(im_root, result_root, conf_threshold):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    print('detection in {}'.format(im_root))
    im_names = sorted(os.listdir(im_root))

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = []
    for idx, im_name in enumerate(im_names[:2]):
        im_file = os.path.join(im_root, im_name)
        im = cv2.imread(im_file,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        data.append({'data': im_tensor, 'im_info': im_info})

    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    arg_params, aux_params = load_param(
        cur_path + '/../model/' +
        ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'),
        config.TEST.test_epoch,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    # nms = gpu_nms_wrapper(config.TEST.NMS, 0)
    # nms = soft_nms_wrapper(config.TEST.NMS, method=2)
    nms = gpu_soft_nms_wrapper(config.TEST.NMS, method=2, device_id=0)

    nms_t = Timer()
    for idx, im_name in enumerate(im_names):
        im_file = os.path.join(im_root, im_name)
        im = cv2.imread(im_file,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        origin_im = im.copy()

        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        # input
        data = [mx.nd.array(im_tensor), mx.nd.array(im_info)]
        data_batch = mx.io.DataBatch(data=[data],
                                     label=[],
                                     pad=0,
                                     index=idx,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data)
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            nms_t.tic()
            keep = nms(cls_dets)
            nms_t.toc()
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.2f}ms'.format(im_name, toc() * 1000)
        print 'nms: {:.2f}ms'.format(nms_t.total_time * 1000)
        nms_t.clear()

        # save results
        person_dets = dets_nms[0]
        with open(os.path.join(result_root, '{:04d}.txt'.format(idx)),
                  'w') as f:
            f.write('{}\n'.format(len(person_dets)))
            for det in person_dets:
                x1, y1, x2, y2, s = det
                w = x2 - x1
                h = y2 - y1
                f.write('0 {} {} {} {} {}\n'.format(s, w, h, x1, y1))

        # visualize
        im = origin_im
        # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im = show_boxes_cv2(im, dets_nms, classes, 1)

        cv2.imshow('det', im)
        cv2.waitKey(1)
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    # load demo data
    image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1)

    print 'done'
Esempio n. 26
0
def main():
    global classes

    assert os.path.exists(args.input), ('%s does not exist'.format(args.input))
    im = cv2.imread(args.input,
                    cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    arr = np.array(im)
    origin_width, origin_height, _ = arr.shape

    portion = smart_chipping(origin_width, origin_height)

    # manually update the configuration
    # print(config.SCALES[0][0])
    # TODO: note this is hard coded and assume there are three values for the SCALE configuration
    config.SCALES[0] = (portion, portion, portion)
    # config.max_per_image =
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_fpn_dcn_rcnn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # load demo data
    data = []

    # portion = args.chip_size

    cwn, chn = (portion, portion)
    wn, hn = (int(origin_width / cwn), int(origin_height / chn))
    padding_y = int(
        math.ceil(float(origin_height) / chn) * chn - origin_height)
    padding_x = int(math.ceil(float(origin_width) / cwn) * cwn - origin_width)
    print("padding_y,padding_x, origin_height, origin_width", padding_y,
          padding_x, origin_height, origin_width)
    # top, bottom, left, right - border width in number of pixels in corresponding directions
    im = cv2.copyMakeBorder(im,
                            0,
                            padding_x,
                            0,
                            padding_y,
                            cv2.BORDER_CONSTANT,
                            value=[0, 0, 0])
    # the section below could be optimized. but basically the idea is to re-calculate all the values
    arr = np.array(im)
    width, height, _ = arr.shape
    cwn, chn = (portion, portion)
    wn, hn = (int(width / cwn), int(height / chn))

    image_list = chip_image(im, (portion, portion))
    for im in image_list:
        target_size = portion
        max_size = portion
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        # print("im.shape,im_scale",im.shape,im_scale)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' +
                                        ('fpn_dcn_xview_480_640_800_alltrain'),
                                        11,
                                        process=True)

    # arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_coco' if not args.fpn_only else 'fpn_coco'), 0, process=True)
    print("loading parameter done")

    if args.cpu_only:
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.cpu()],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = py_nms_wrapper(config.TEST.NMS)
    else:
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(args.gpu_index)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    num_preds = int(5000 * math.ceil(float(portion) / 400))
    # test
    boxes, scores, classes = generate_detections(data, data_names, predictor,
                                                 config, nms, image_list,
                                                 num_preds)
    #Process boxes to be full-sized

    print("boxes shape is", boxes.shape, "wn, hn", wn, hn, "width, height",
          width, height)
    bfull = boxes.reshape((wn, hn, num_preds, 4))

    for i in range(wn):
        for j in range(hn):
            bfull[i, j, :, 0] += j * cwn
            bfull[i, j, :, 2] += j * cwn

            bfull[i, j, :, 1] += i * chn
            bfull[i, j, :, 3] += i * chn

            # clip values
            bfull[i, j, :, 0] = np.clip(bfull[i, j, :, 0], 0, origin_height)
            bfull[i, j, :, 2] = np.clip(bfull[i, j, :, 2], 0, origin_height)

            bfull[i, j, :, 1] = np.clip(bfull[i, j, :, 1], 0, origin_width)
            bfull[i, j, :, 3] = np.clip(bfull[i, j, :, 3], 0, origin_width)

    bfull = bfull.reshape((hn * wn, num_preds, 4))
    scores = scores.reshape((hn * wn, num_preds))
    classes = classes.reshape((hn * wn, num_preds))

    #only display boxes with confidence > .5

    # print(bfull, scores, classes)
    #bs = bfull[scores > 0.08]
    #cs = classes[scores>0.08]
    #print("bfull.shape,scores.shape, bs.shape",bfull.shape,scores.shape, bs.shape)
    # s = im_name
    # draw_bboxes(arr,bs,cs).save("/tmp/"+s[0].split(".")[0] + ".png")

    #scoring_line_threshold = 11000

    #if bs.shape[0] > scoring_line_threshold:
    # too many predictions, we should trim the low confidence ones
    with open(args.output, 'w') as f:
        for i in range(bfull.shape[0]):
            for j in range(bfull[i].shape[0]):
                #box should be xmin ymin xmax ymax
                box = bfull[i, j]
                class_prediction = classes[i, j]
                score_prediction = scores[i, j]
                if int(class_prediction) != 0:
                    f.write('%d %d %d %d %d %f \n' % \
                        (box[0], box[1], box[2], box[3], int(class_prediction), score_prediction))

    print('done')
Esempio n. 27
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_fgfa_flownet_vid'
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names
    num_classes = 31
    classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data

    image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn_fgfa/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({'data': im_tensor, 'im_info': im_info,  'data_cache': im_tensor,    'feat_cache': im_tensor})



    # get predictor

    print 'get-predictor'
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))),
                       ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM,
                                                np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int),
                                                np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 0, process=True)

    feat_predictors = Predictor(feat_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)


    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                 provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                 provide_label=[None])
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    all_boxes = [[[] for _ in range(len(data))]
                 for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = 1e-3
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        if(idx != len(data)-1):

            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                data_batch.data[0][-2] = None
                data_batch.provide_data[0][-2] = ('data_cache', None)
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis,
                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                total_time = time.time()-t1
                if (cfg.TEST.SEQ_NMS==False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis,
                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1))
                file_idx += 1
                end_counter+=1

    if(cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)]
            out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg)
            save_image(output_dir, idx, out_im)

    print 'done'
def main(tempFileList, fileOp):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    out_dir = os.path.join(
        cur_path,
        'demo/output/terror-det-rg-data-output/terror-det-v0.9-test/JPEGImages'
    )
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    # set up class names
    num_classes = 7
    classes = [
        'tibetan flag', 'guns', 'knives', 'not terror', 'islamic flag',
        'isis flag'
    ]

    # load demo data
    image_names = tempFileList
    data = []
    for im_name in image_names:
        im_file = im_name
        print(im_file)
        im = cv2.imread(im_file, cv2.IMREAD_COLOR)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/demo/models/' +
                                        ('rfcn_voc'),
                                        10,
                                        process=True)
    #modify by zxt
    #mx.model.save_checkpoint('f1/final', 10, sym, arg_params, aux_params)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    # fileOp = open(os.path.join(cur_path, 'terror-det-rg-test-result.txt'), 'w')
    fileOp = fileOp
    for idx, im_name in enumerate(image_names):
        print("begining process %s" % (im_name))
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(im_name)
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im_result = show_boxes(fileOp, im_name, im, dets_nms, classes, 1)
        cv2.imwrite(out_dir + im_name.split('/')[-1], im_result)
    print 'done'
Esempio n. 29
0
    def batch_extract(self, multiple=True, gt_dir=None, epoch=0):
        """

        :param multiple:
        :param gt_dir:
        :return:
        """
        if len(self.img_list) % self.batch_size != 0:
            batch = len(self.img_list) / self.batch_size + 1
        else:
            batch = len(self.img_list) / self.batch_size

        for i in xrange(batch):

            if i < batch - 1:
                self.batch_list = self.img_list[i * self.batch_size:(i + 1) *
                                                self.batch_size]
            else:
                self.batch_list = self.img_list[i * self.batch_size:]

            print '\nMini-batch %d\t' % (i + 1)

            tmp_data = []
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]

            tic()
            for img in self.batch_list:
                assert os.path.exists(img), ('%s does not exist.'.format(img))
                im = cv2.imread(
                    img, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
                im, im_scale = resize(im,
                                      target_size,
                                      max_size,
                                      stride=config.network.IMAGE_STRIDE)
                im_tensor = transform(im, config.network.PIXEL_MEANS)
                # im_info: height, width, scale
                im_info = np.array(
                    [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                    dtype=np.float32)
                tmp_data.append({
                    self.data_names[0]: im_tensor,
                    self.data_names[1]: im_info
                })

            self.ctx = [int(i) for i in config.gpus.split(',')]
            self.data = [[
                mx.nd.array(tmp_data[i][name], mx.gpu(self.ctx[0]))
                for name in self.data_names
            ] for i in xrange(len(tmp_data))]

            max_data_shape = [[(self.data_names[0],
                                (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
            provide_data = [[(k, v.shape)
                             for k, v in zip(self.data_names, self.data[i])]
                            for i in xrange(len(self.data))]
            provide_label = [None for i in xrange(len(self.data))]

            arg_params, aux_params = load_param(self.model_dir,
                                                epoch,
                                                process=True)

            self.predictor = Predictor(self.sym,
                                       self.data_names,
                                       self.label_name,
                                       context=[mx.gpu(self.ctx[0])],
                                       max_data_shapes=max_data_shape,
                                       provide_data=provide_data,
                                       provide_label=provide_label,
                                       arg_params=arg_params,
                                       aux_params=aux_params)
            print 'preparation: %.4fs' % toc()

            if i == 0:
                self.warmup()

            self.forward(multiple=multiple, gt_dir=gt_dir)

        self.cleaner()
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 19

    # load demo data
    image_names = ['frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data']
    label_names = ['softmax_label']
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        output_all = predictor.predict(data_batch)
        output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all]

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])

        tic()
        output_all = predictor.predict(data_batch)
        output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all]
        pallete = getpallete(256)

        segmentation_result = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(segmentation_result)
        segmentation_result.putpalette(pallete)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        pure_im_name, ext_im_name = os.path.splitext(im_name)
        segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png')
        # visualize
        im_raw = cv2.imread(cur_path + '/../demo/' + im_name)
        seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png')
        cv2.imshow('Raw Image', im_raw)
        cv2.imshow('segmentation_result', seg_res)
        cv2.waitKey(0)
    print 'done'
Esempio n. 31
0
def im_detect_bbox_aug(net,
                       nms_wrapper,
                       img_path,
                       scales,
                       pixel_means,
                       bbox_stds,
                       ctx,
                       threshold=1e-3,
                       viz=False):
    all_bboxes = []
    all_scores = []
    img_ori = cv2.imread(img_path.encode("utf-8"))
    for scale_min, scale_max in scales:
        fscale = 1.0 * scale_min / min(img_ori.shape[:2])
        img_resized = cv2.resize(img_ori, (0, 0), fx=fscale, fy=fscale)
        h, w, c = img_resized.shape
        h_padded = h if h % 32 == 0 else h + 32 - h % 32
        w_padded = w if w % 32 == 0 else w + 32 - w % 32
        img_padded = np.zeros(shape=(h_padded, w_padded, c),
                              dtype=img_resized.dtype)
        img_padded[:h, :w, :] = img_resized
        img = transform(img_padded, pixel_means=pixel_means)
        im_info = nd.array([[h_padded, w_padded, 1.0]], ctx=ctx[0])
        data = nd.array(img, ctx=ctx[0])

        rois, scores, bbox_deltas = net(data, im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)
        bbox = clip_boxes(bbox, data.shape[2:4])
        bbox /= fscale
        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

        # hflip
        rois, scores, bbox_deltas = net(data[:, :, :, ::-1], im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)
        bbox = clip_boxes(bbox, data.shape[2:4])

        tmp = bbox[:, 0::4].copy()
        bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1  # x0 = w - x0
        bbox[:, 2::4] = data.shape[3] - tmp - 1  # x1 = w -x1
        bbox /= fscale

        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

        #
        # vflip
        rois, scores, bbox_deltas = net(data[:, :, ::-1], im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)

        bbox = clip_boxes(bbox, data.shape[2:4])

        tmp = bbox[:, 1::4].copy()
        bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1  # x0 = w - x0
        bbox[:, 3::4] = data.shape[2] - tmp - 1  # x1 = w -x1
        bbox /= fscale

        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

        # vhflip
        rois, scores, bbox_deltas = net(data[:, :, ::-1, ::-1], im_info)
        rois = rois[:, 1:].asnumpy()
        bbox_deltas = bbox_deltas[0].asnumpy()
        bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds)
        bbox = bbox_pred(rois, bbox_deltas)

        bbox = clip_boxes(bbox, data.shape[2:4])

        tmp = bbox[:, 1::4].copy()
        bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1  # x0 = w - x0
        bbox[:, 3::4] = data.shape[2] - tmp - 1  # x1 = w -x1

        tmp = bbox[:, 0::4].copy()
        bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1  # x0 = w - x0
        bbox[:, 2::4] = data.shape[3] - tmp - 1  # x1 = w -x1

        bbox /= fscale

        all_bboxes.append(bbox)
        all_scores.append(scores[0].asnumpy())

    all_bboxes = np.concatenate(all_bboxes, axis=0)
    all_scores = np.concatenate(all_scores, axis=0)
    pred_bboxes = []
    pred_scores = []
    pred_clsid = []
    for j in range(1, all_scores.shape[1]):
        cls_scores = all_scores[:, j, np.newaxis]
        cls_boxes = all_bboxes[:, 4:
                               8] if config.CLASS_AGNOSTIC else all_bboxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
        cls_dets = np.hstack((cls_boxes, cls_scores))
        keep = nms_wrapper(cls_dets.astype('f'))
        cls_dets = cls_dets[keep, :]
        cls_dets = cls_dets[cls_dets[:, -1] > threshold, :]
        pred_bboxes.append(cls_dets[:, :4])
        pred_scores.append(cls_dets[:, 4])
        pred_clsid.append(j *
                          np.ones(shape=(cls_dets.shape[0], ), dtype=np.int))
    pred_bboxes = np.concatenate(pred_bboxes, axis=0)
    pred_scores = np.concatenate(pred_scores, axis=0)
    pred_clsid = np.concatenate(pred_clsid, axis=0)
    if viz:
        import gluoncv
        import matplotlib.pyplot as plt
        gluoncv.utils.viz.plot_bbox(img_ori[:, :, ::-1],
                                    bboxes=pred_bboxes,
                                    scores=pred_scores,
                                    labels=pred_clsid,
                                    thresh=.5)
        plt.show()
    return pred_bboxes, pred_scores, pred_clsid
    def predict(self, images, feat_output, aggr_feat_output):

        model = self.model
        all_frame_interval = self.all_frame_interval
        feat_sym = self.feat_sym
        aggr_sym = self.aggr_sym
        num_classes = self.num_classes
        classes = self.classes
        max_per_image = self.max_per_image

        output_dir = cur_path + '/../demo/rfcn_fgfa_{}/'.format(self.index)
        self.index += 1
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        data = []
        for im in images:
            target_size = cfg.SCALES[0][0]
            max_size = cfg.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=cfg.network.IMAGE_STRIDE)
            im_tensor = transform(im, cfg.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)

            feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
            data.append({
                'data': im_tensor,
                'im_info': im_info,
                'data_cache': im_tensor,
                'feat_cache': im_tensor
            })

        # get predictor

        print 'get-predictor'
        data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
        label_names = []

        t1 = time.time()
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        max_data_shape = [[
            ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                      max([v[1] for v in cfg.SCALES]))),
            ('data_cache', (11, 3, max([v[0] for v in cfg.SCALES]),
                            max([v[1] for v in cfg.SCALES]))),
            ('feat_cache',
             ((11, cfg.network.FGFA_FEAT_DIM,
               np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int),
               np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(
                   np.int))))
        ]]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for _ in xrange(len(data))]

        arg_params, aux_params = load_param(cur_path + model, 0, process=True)

        feat_predictors = Predictor(feat_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)
        aggr_predictors = Predictor(aggr_sym,
                                    data_names,
                                    label_names,
                                    context=[mx.gpu(0)],
                                    max_data_shapes=max_data_shape,
                                    provide_data=provide_data,
                                    provide_label=provide_label,
                                    arg_params=arg_params,
                                    aux_params=aux_params)
        nms = py_nms_wrapper(cfg.TEST.NMS)

        # First frame of the video
        idx = 0
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        all_boxes = [[[] for _ in range(len(data))]
                     for _ in range(num_classes)]
        data_list = deque(maxlen=all_frame_interval)
        feat_list = deque(maxlen=all_frame_interval)
        image, feat = get_resnet_output(feat_predictors, data_batch,
                                        data_names)
        # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
        while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
            data_list.append(image)
            feat_list.append(feat)

        vis = False
        file_idx = 0
        thresh = 1e-3
        for idx, element in enumerate(data):

            data_batch = mx.io.DataBatch(
                data=[element],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, element)]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]

            if (idx != len(data) - 1):

                if len(data_list) < all_frame_interval - 1:
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    data_list.append(image)
                    feat_list.append(feat)

                else:
                    #################################################
                    # main part of the loop
                    #################################################
                    image, feat = get_resnet_output(feat_predictors,
                                                    data_batch, data_names)
                    data_list.append(image)
                    feat_list.append(feat)

                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch, data_names,
                                                       scales, cfg)
                    assert len(aggr_feat) == 1

                    data_batch.data[0][-2] = None
                    data_batch.provide_data[0][-2] = ('data_cache', None)
                    data_batch.data[0][-1] = None
                    data_batch.provide_data[0][-1] = ('feat_cache', None)

                    out_im = process_pred_result(
                        classes, pred_result, num_classes, thresh, cfg, nms,
                        all_boxes, file_idx, max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(),
                        scales)
                    total_time = time.time() - t1
                    if (cfg.TEST.SEQ_NMS == False):
                        save_image(output_dir, file_idx, out_im)
                    print 'testing {} {:.4f}s'.format(
                        str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                    file_idx += 1

            else:
                #################################################
                # end part of a video                           #
                #################################################

                end_counter = 0
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                    data_list.append(image)
                    feat_list.append(feat)
                    prepare_data(data_list, feat_list, data_batch)
                    pred_result, aggr_feat = im_detect(aggr_predictors,
                                                       data_batch, data_names,
                                                       scales, cfg)
                    assert len(aggr_feat) == 1

                    out_im = process_pred_result(
                        classes, pred_result, num_classes, thresh, cfg, nms,
                        all_boxes, file_idx, max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(),
                        scales)

                    total_time = time.time() - t1
                    if (cfg.TEST.SEQ_NMS == False):
                        save_image(output_dir, file_idx, out_im)
                    print 'testing {} {:.4f}s'.format(
                        str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                    file_idx += 1
                    end_counter += 1

        if (cfg.TEST.SEQ_NMS):
            video = [all_boxes[j][:] for j in range(1, num_classes)]
            dets_all = seq_nms(video)
            for cls_ind, dets_cls in enumerate(dets_all):
                for frame_ind, dets in enumerate(dets_cls):
                    keep = nms(dets)
                    all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
            for idx in range(len(data)):
                boxes_this_image = [[]] + [
                    all_boxes[j][idx] for j in range(1, num_classes)
                ]
                out_im = draw_all_detection(data[idx][0].asnumpy(),
                                            boxes_this_image, classes,
                                            scales[0], cfg)
                save_image(output_dir, idx, out_im)

        print 'done'
Esempio n. 33
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    model = '/../model/rfcn_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = ['airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data
    image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    #

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        time += toc()
        count += 1
        print 'testing {} {:.4f}s'.format(im_name, time/count)

        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)

        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename,out_im)

    print 'done'
Esempio n. 34
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    # load demo data
    image_path = './data/RoadImages/test/'
    image_names = glob.glob(image_path + '*.jpg')

    print("Image amount {}".format(len(image_names)))
    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][1]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road',
        19,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # test
    notation_dict = {}
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # notation_list.append(get_notation(im_name, dets_nms, classes, scale=1.0, gen_bbox_pic=True))
        notation_dict.update(
            get_notation(im_name,
                         dets_nms,
                         classes,
                         scale=1.0,
                         gen_bbox_pic=True))
    save_notation_file(notation_dict)
    print 'done'
Esempio n. 35
0
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    # load demo data
    image_names = []
    names_dirs = os.listdir(cur_path + '/../' + test_dir)
    for im_name in names_dirs:
        if im_name[-4:] == '.jpg' or im_name[-4:] == '.png':
            image_names.append(im_name)

    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../' + test_dir + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../' + test_dir + im_name,
                        cv2.IMREAD_COLOR | long(128))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        #print "before scale: "
        #print im.shape
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print "after scale: "
        #print im.shape
        #im_scale = 1.0
        #print "scale ratio: "
        #print im_scale
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        #print im_tensor.shape
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../' + model_dir,
                                        0,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)
    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, [1.0], config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]
        #print im_shapes

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1],
                im_shapes[0][0], config.TEST.NMS,
                config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(cur_path + '/../' + test_dir + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False)

        # Save img
        cv2.imwrite(cur_path + '/../' + result_dir + im_name,
                    cv2.cvtColor(im, cv2.COLOR_BGR2RGB))

    print 'done'