def multi_detect_input(im): #print(im.shape) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_idx = [{"data": im_tensor, "im_info": im_info}] data_idx = [[ mx.nd.array(data_idx[i][name]) for name in data_names ] for i in xrange(len(data_idx))] data_batch = mx.io.DataBatch( data=[data_idx[0]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] return data_batch, scales
def predict(self, imarray, pixel_means, pixel_stds, crop_size=512, color_scale=-1, feature_ratio= 2.0 / 3, num_steps=1, output_name="softmax",feature_stride = 8): im_tensor = transform(imarray, pixel_means, color_scale=color_scale, pixel_stds=pixel_stds) long_size = max(im_tensor[2:]) if(long_size<crop_size): return self.predict_patch(im_tensor,feature_stride,num_steps,output_name)
def get_data_tensor_info(self, im): im_, im_scale = resize(im, self.target_size, self.max_size, stride=self.image_stride) im_tensor = transform(im_, self.pixel_means) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_tensor_info = [mx.nd.array(im_tensor), mx.nd.array(im_info)] return data_tensor_info
def generate_batch(im): """ preprocess image, return batch :param im: cv2.imread returns [height, width, channel] in BGR :return: data_batch: MXNet input batch data_names: names in data_batch im_scale: float number """ SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] PIXEL_MEANS = config.network.PIXEL_MEANS DATA_NAMES = ['data', 'im_info'] im_array, im_scale = resize(im, SHORT_SIDE, LONG_SIDE) im_array = transform(im_array, PIXEL_MEANS) im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32) data = [[mx.nd.array(im_array), mx.nd.array(im_info)]] data_shapes = [[('data', im_array.shape), ('im_info', im_info.shape)]] data_batch = mx.io.DataBatch(data=data, label=[None], provide_data=data_shapes, provide_label=[None]) return data_batch, DATA_NAMES, [im_scale]
def det(mod, fn): raw_img = cv2.imread(fn) if raw_img.shape[0] < raw_img.shape[1]: raw_img = cv2.copyMakeBorder(raw_img,0 ,raw_img.shape[1]-raw_img.shape[0], 0, 0, cv2.BORDER_CONSTANT) im_shape = [IMG_H,IMG_W] # reverse order img = cv2.resize(raw_img, (IMG_H,IMG_W)) raw_h = img.shape[0] raw_w = img.shape[1] im_tensor = image.transform(img, [124,117,104], 0.0167) im_info = np.array([[ IMG_H, IMG_W, 4.18300658e-01]]) batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)]) start = time.time() mod.forward(batch) output_names = mod.output_names output_tensor = mod.get_outputs() mod.get_outputs()[0].wait_to_read() print ("time", time.time()-start, "secs.") output = dict(zip(output_names ,output_tensor)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) num_classes = 2 all_cls_dets = [[] for _ in range(num_classes)] for j in range(1, num_classes): indexes = np.where(scores[:, j] > 0.1)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)).copy() all_cls_dets[j] = cls_dets for idx_class in range(1, num_classes): nms = py_nms_wrapper(0.3) keep = nms(all_cls_dets[idx_class]) all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :] for i in range(all_cls_dets[1].shape[0]): cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1])) ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),1) cv2.imshow("w", img) cv2.waitKey()
def main(im_name, frame, score): data = [] # only resize input image to target size and return scale im, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))] masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') # print (im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = cpu_mask_voting(masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH) dets = [result_dets[j] for j in range(1, num_classes)] masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)] print 'testing {} {:.4f}s'.format(im_name, toc()) min_confidence = score # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:,-1] > min_confidence) dets[i] = dets[i][keep] masks[i] = masks[i][keep] ''' dets: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ] [x1 y1] - upper left corner of object [x2 y2] - lower right corner of object masks: [ numpy.ndarray([21, 21]) for j in classes ] confidence that pixel belongs to object ''' for i in range(len(dets)): if len(dets[i]) > 0: for j in range(len(dets[i])): print('{name}: {score} ({loc})'.format(name = classes[i], score = dets[i][j][-1], loc = dets[i][j][:-1].tolist()))
def fcis_seg(image, classes, predictor, args): num_classes = len(classes) + 1 data = [] target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(image, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) start = time.time() im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) data_names = ['data', 'im_info'] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') # print (im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, 0) dets = [result_dets[j] for j in range(1, num_classes)] masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)] cods, bimsks, names = decode_mask(im, dets, masks, classes, config, args) return cods, bimsks, names
def main(): # get symbol pprint.pprint(config) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol_rfcn(config, is_train=False) # load demo data image_names = ['000057.jpg', '000149.jpg', '000351.jpg', '002535.jpg'] image_all = [] # ground truth boxes gt_boxes_all = [np.array([[132, 52, 384, 357]]), np.array([[113, 1, 350, 360]]), np.array([[0, 27, 329, 155]]), np.array([[8, 40, 499, 289]])] gt_classes_all = [np.array([3]), np.array([16]), np.array([7]), np.array([12])] data = [] for idx, im_name in enumerate(image_names): assert os.path.exists(cur_path + '/../demo/deform_psroi/' + im_name), \ ('%s does not exist'.format('../demo/deform_psroi/' + im_name)) im = cv2.imread(cur_path + '/../demo/deform_psroi/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) image_all.append(im) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) gt_boxes = gt_boxes_all[idx] gt_boxes = np.round(gt_boxes * im_scale) data.append({'data': im_tensor, 'rois': np.hstack((np.zeros((gt_boxes.shape[0], 1)), gt_boxes))}) # get predictor data_names = ['data', 'rois'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/deform_psroi', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # test for idx, _ in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) output = predictor.predict(data_batch) cls_offset = output[0]['rfcn_cls_offset_output'].asnumpy() im = image_all[idx] im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) boxes = gt_boxes_all[idx] show_dpsroi_offset(im, boxes, cls_offset, gt_classes_all[idx])
def main(): # get symbol pprint.pprint(config) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # load demo data image_names = ['000240.jpg', '000437.jpg', '004072.jpg', '007912.jpg'] image_all = [] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/deform_conv/' + im_name), \ ('%s does not exist'.format('../demo/deform_conv/' + im_name)) im = cv2.imread(cur_path + '/../demo/deform_conv/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) image_all.append(im) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/deform_conv', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # test for idx, _ in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) output = predictor.predict(data_batch) res5a_offset = output[0]['res5a_branch2b_offset_output'].asnumpy() res5b_offset = output[0]['res5b_branch2b_offset_output'].asnumpy() res5c_offset = output[0]['res5c_branch2b_offset_output'].asnumpy() im = image_all[idx] im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_dconv_offset(im, [res5c_offset, res5b_offset, res5a_offset])
def _load_frame(self, idx): im = self.images[idx] target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) return mx.nd.array(im_tensor), mx.nd.array(im_info)
def predict(self, im): im, im_scale = resize(im, self.scales[0], self.scales[1], stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data = {'data': im_tensor, 'im_info': im_info} data = [[ mx.nd.array(data[self.data_names[0]]), mx.nd.array(data[self.data_names[1]]) ]] data_batch = mx.io.DataBatch( data=data, label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(self.data_names, data[0])]], provide_label=[None]) scores, boxes, data_dict = im_detect(self.predictor, data_batch, self.data_names, [im_scale], config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = self.nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) res = {} for idx, cls in enumerate(self.classes): res['{}'.format(cls)] = dets_nms[idx].tolist() logging.debug("Predictions: {}".format(res)) return res
def parfetch(self, iroidb): # get testing data for multigpu data, label = get_rpn_pair_mv_batch(iroidb, self.cfg) #for k, v in data.items(): # print(k, v) data_shape = {k: v.shape for k, v in data.items()} #print(data_shape) del data_shape['im_info'] del data_shape['data'] data_shape1 = copy.deepcopy(data_shape) del data_shape1['eq_flag'] del data_shape1['motion_vector'] _, feat_shape, _ = self.feat_conv_3x3_relu.infer_shape(**data_shape1) #print('feat_shape: ', feat_shape) #print('shape: ', data['motion_vector'].shape) #print("size: ", int(feat_shape[0][2]), int(feat_shape[0][3])) data['motion_vector'] = data['motion_vector'].astype('float64') data['motion_vector'] = cv2.resize( data['motion_vector'], (int(feat_shape[0][3]), int(feat_shape[0][2])), interpolation=cv2.INTER_AREA) #print('data[\'motion_vector\'].shape: ', data['motion_vector'].shape) data['motion_vector'] = transform(data['motion_vector'], [0, 0]) #print('data[\'motion_vector\'].shape: ', data['motion_vector'].shape) #print("data['motion_vector']: ", data['motion_vector']) #data['motion_vector'] = cv2.resize(data['motion_vector'], (36, 63)) data_shape = {k: v.shape for k, v in data.items()} #print(data_shape) del data_shape['im_info'] del data_shape['data'] _, feat_shape, _ = self.feat_sym.infer_shape(**data_shape) feat_shape = [int(i) for i in feat_shape[0]] # add gt_boxes to data for e2e data['gt_boxes'] = label['gt_boxes'][np.newaxis, :, :] # assign anchor for label label = assign_anchor(feat_shape, label['gt_boxes'], data['im_info'], self.cfg, self.feat_stride, self.anchor_scales, self.anchor_ratios, self.allowed_border, self.normalize_target, self.bbox_mean, self.bbox_std) return {'data': data, 'label': label}
def loadImage(self, im, bg): self.im = im self.fg = im self.bg = bg target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] self.im, im_scale = resize(self.im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(self.im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) self.data = {'data': im_tensor, 'im_info': im_info} self.data = [mx.nd.array(self.data[name]) for name in self.data_names]
def load_test(filename, short, max_size, mean, std): # read and transform image im_orig = imdecode(filename) im, im_scale = resize(im_orig, short, max_size) height, width = im.shape[:2] im_info = mx.nd.array([height, width, im_scale]) # transform into tensor and normalize im_tensor = transform(im, mean, std) # for 1-batch inference purpose, cannot use batchify (or nd.stack) to expand dims im_tensor = mx.nd.array(im_tensor).expand_dims(0) im_info = mx.nd.array(im_info).expand_dims(0) # transform cv2 BRG image to RGB for matplotlib im_orig = im_orig[:, :, (2, 1, 0)] return im_tensor, im_info, im_orig
def get_predictor(sym, image, arg_params, aux_params): data = [] target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(image, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) return predictor
def _load_frame(self, idx): im_name = self.images_names[idx] assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if idx == 100: im[...] = 0 target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) return mx.nd.array(im_tensor), mx.nd.array(im_info)
def load_data_and_get_predictor(self, image_names): # load demo data #image_names = ['COCO_test2015_000000000891.jpg', # 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: #assert os.path.exists( # cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) #im = cv2.imread(cur_path + '/../demo/' + im_name, # cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor self.data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in self.data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max( [v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(self.data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] self.predictor = Predictor(self.sym, self.data_names, label_names, context=[mx.gpu(1)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=self.arg_params, aux_params=self.aux_params) self.nms = gpu_nms_wrapper(config.TEST.NMS, 0) return data
def generate_batch_V2(im,num_gpu): """ preprocess image, return batch :param im: cv2.imread returns [height, width, channel] in BGR :return: data_batch: MXNet input batch data_names: names in data_batch im_scale: float number """ array_list , info_list =[],[] for idx in range(0,num_gpu): SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] PIXEL_MEANS = config.network.PIXEL_MEANS DATA_NAMES = ['data', 'im_info'] im_array, im_scale = resize(im[idx], SHORT_SIDE, LONG_SIDE) im_array = transform(im_array, PIXEL_MEANS) im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32) array_list.append(im_array) info_list.append(im_info) data = [[mx.nd.array(_), mx.nd.array(__)] for _ in array_list,for __ in info_list]
def read_data(self, im_name): #data (1,3, 562,1000) data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) print("About to read ", im_name) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = self.cfg.SCALES[0][0] max_size = self.cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=self.cfg.network.IMAGE_STRIDE) im_tensor = transform(im, self.cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) d = { 'data': mx.nd.array(im_tensor), 'im_info': mx.nd.array(im_info), 'data_cache': mx.nd.array(im_tensor), 'feat_cache': mx.nd.array(im_tensor) } return [d[data_name] for data_name in data_names]
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn_online_train' model = '/../model/rfcn_fgfa_flownet_vid_original' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names num_classes = 31 classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data snippet_name = 'ILSVRC2015_val_00177001' #'ILSVRC2015_val_00007016'#'ILSVRC2015_val_00000004' # 'ILSVRC2015_val_00007016'# 'ILSVRC2015_val_00044006' #'ILSVRC2015_val_00007010' #'ILSVRC2015_val_00016002' # ILSVRC2015_val_00010001: zebra, # 37002:cat and fox, motion blur # ILSVRC2015_val_00095000: fox, defocus image_names = glob.glob(cur_path + '/../demo/' + snippet_name + '/*.JPEG') image_names.sort() output_dir = cur_path + '/../demo/test_'# rfcn_fgfa_online_train_' output_dir_ginst = cur_path + '/../demo/test_ginst_' # rfcn_fgfa_online_train_' output_dir_linst = cur_path + '/../demo/test_linst_' if (cfg.TEST.SEQ_NMS): output_dir += 'SEQ_NMS_' output_dir_ginst += 'SEQ_NMS_' output_dir += snippet_name + '/' output_dir_ginst += snippet_name + '/' output_dir_linst += snippet_name + '/' data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor}) # get predictor print('get-predictor') data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) #add parameters for instance cls & regression arg_params['rfcn_ibbox_bias'] = arg_params['rfcn_bbox_bias'].copy() #deep copy arg_params['rfcn_ibbox_weight'] = arg_params['rfcn_bbox_weight'].copy() #deep copy max_inst = cfg.TEST.NUM_INSTANCES feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] all_boxes_inst = [[[] for _ in range(len(data))] for _ in range(num_classes)] ginst_mem = [] # list for instance class sim_array_global = [] # similarity array list ginst_ID = 0 data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = True file_idx = 0 thresh = (1e-3) for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] if(idx != len(data)-1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: #if file_idx ==15: # print( '%d frame' % (file_idx)) ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) #put 19 data & feat list into data_batch #aggr_predictors._mod.forward(data_batch) #sidong #zxcv= aggr_predictors._mod.get_outputs(merge_multi_context=False)#sidong pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg) # get box result [[scores, pred_boxes, rois, data_dict, iscores, ipred_boxes, cropped_embed]] data_batch.data[0][-2] = None # 19 frames of data possesses much memory, so clear it data_batch.provide_data[0][-2] = ('data_cache', None) # also clear shape info of data data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) ginst_ID_prev = ginst_ID ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, all_boxes_inst, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales, ginst_mem, sim_array_global, ginst_ID) #out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image, # data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) ginst_ID_now = ginst_ID init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors, arg_params) total_time = time.time()-t1 if (cfg.TEST.SEQ_NMS==False): if cfg.TEST.DISPLAY[0]: save_image(output_dir, file_idx, out_im) if cfg.TEST.DISPLAY[1]: save_image(output_dir_ginst, file_idx, out_im2) if cfg.TEST.DISPLAY[2]: save_image(output_dir_linst, file_idx, out_im_linst) #testing by metric print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1))) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect_all(aggr_predictors, data_batch, data_names, scales, cfg) ginst_ID_prev = ginst_ID ginst_ID, out_im, out_im2, out_im_linst = process_link_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, all_boxes_inst, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales, ginst_mem, sim_array_global, ginst_ID) # out_im2 = process_pred_result_rois(pred_result, cfg.TEST.RPN_NMS_THRESH, cfg, nms, all_rois, file_idx, max_per_image, # data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) ginst_ID_now = ginst_ID init_inst_params(ginst_mem, ginst_ID_prev, ginst_ID_now, max_inst, aggr_predictors ,arg_params) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): if cfg.TEST.DISPLAY[0]: save_image(output_dir, file_idx, out_im) if cfg.TEST.DISPLAY[1]: save_image(output_dir_ginst, file_idx, out_im2) if cfg.TEST.DISPLAY[2]: save_image(output_dir_linst, file_idx, out_im_linst) print( 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1))) file_idx += 1 end_counter += 1 if(cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print('done')
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_flownet_deeplab' model1 = '/../model/rfcn_dff_flownet_vid' model2 = '/../model/deeplab_dcn_cityscapes' sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym = sym_instance.get_key_test_symbol(config) cur_sym = sym_instance.get_cur_test_symbol(config) # settings num_classes = 19 interv = args.interval num_ex = args.num_ex # load demo data image_names = sorted( glob.glob(cur_path + '/../demo/cityscapes_data/cityscapes_frankfurt_all_i' + str(interv) + '/*.png')) image_names = image_names[:interv * num_ex] label_files = sorted( glob.glob( cur_path + '/../demo/cityscapes_data/cityscapes_frankfurt_labels_all/*.png')) output_dir = cur_path + '/../demo/deeplab_dff/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = interv # data = [] key_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': key_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1)) }) # get predictor data_names = ['data', 'data_key', 'feat_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # models: rfcn_dff_flownet_vid, deeplab_cityscapes arg_params, aux_params = load_param_multi(cur_path + model1, cur_path + model2, 0, process=True) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) output_all, _ = im_segment(cur_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] print "warmup done" # test time = 0 count = 0 hist = np.zeros((num_classes, num_classes)) lb_idx = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() if idx % key_frame_interval == 0: print '\nframe {} (key)'.format(idx) # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: print '\nframe {} (intermediate)'.format(idx) data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) output_all, _ = im_segment(cur_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] elapsed = toc() time += elapsed count += 1 print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed, time / count) pred = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(pred) pallete = getpallete(256) segmentation_result.putpalette(pallete) _, im_filename = os.path.split(im_name) segmentation_result.save(output_dir + '/seg_' + im_filename) label = None _, lb_filename = os.path.split(label_files[lb_idx]) im_comps = im_filename.split('_') lb_comps = lb_filename.split('_') # if annotation available for frame if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]: print 'label {}'.format(lb_filename) label = np.asarray(Image.open(label_files[lb_idx])) if lb_idx < len(label_files) - 1: lb_idx += 1 if label is not None: curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes) hist += curr_hist print 'mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2)) print '(cum) mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2)) ious = per_class_iu(hist) * 100 print ' '.join('{:.03f}'.format(i) for i in ious) print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2)) print 'done'
mod_key.set_params(arg_params, aux_params) #%% data = [] key_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) # print(im_info.shape) # print im_info if idx % key_frame_interval == 0: key_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': key_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 36, 63)) }) # get predictor # data_names = ['data', 'im_info', 'data_key', 'feat_key']
def run_detection(im_root, result_root, conf_threshold): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] print('detection in {}'.format(im_root)) im_names = sorted(os.listdir(im_root)) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [] for idx, im_name in enumerate(im_names[:2]): im_file = os.path.join(im_root, im_name) im = cv2.imread(im_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), config.TEST.test_epoch, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # nms = gpu_nms_wrapper(config.TEST.NMS, 0) # nms = soft_nms_wrapper(config.TEST.NMS, method=2) nms = gpu_soft_nms_wrapper(config.TEST.NMS, method=2, device_id=0) nms_t = Timer() for idx, im_name in enumerate(im_names): im_file = os.path.join(im_root, im_name) im = cv2.imread(im_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) origin_im = im.copy() target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) # input data = [mx.nd.array(im_tensor), mx.nd.array(im_info)] data_batch = mx.io.DataBatch(data=[data], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) nms_t.tic() keep = nms(cls_dets) nms_t.toc() cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.2f}ms'.format(im_name, toc() * 1000) print 'nms: {:.2f}ms'.format(nms_t.total_time * 1000) nms_t.clear() # save results person_dets = dets_nms[0] with open(os.path.join(result_root, '{:04d}.txt'.format(idx)), 'w') as f: f.write('{}\n'.format(len(person_dets))) for det in person_dets: x1, y1, x2, y2, s = det w = x2 - x1 h = y2 - y1 f.write('0 {} {} {} {} {}\n'.format(s, w, h, x1, y1)) # visualize im = origin_im # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = show_boxes_cv2(im, dets_nms, classes, 1) cv2.imshow('det', im) cv2.waitKey(1)
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print 'done'
def main(): global classes assert os.path.exists(args.input), ('%s does not exist'.format(args.input)) im = cv2.imread(args.input, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) arr = np.array(im) origin_width, origin_height, _ = arr.shape portion = smart_chipping(origin_width, origin_height) # manually update the configuration # print(config.SCALES[0][0]) # TODO: note this is hard coded and assume there are three values for the SCALE configuration config.SCALES[0] = (portion, portion, portion) # config.max_per_image = # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_fpn_dcn_rcnn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # load demo data data = [] # portion = args.chip_size cwn, chn = (portion, portion) wn, hn = (int(origin_width / cwn), int(origin_height / chn)) padding_y = int( math.ceil(float(origin_height) / chn) * chn - origin_height) padding_x = int(math.ceil(float(origin_width) / cwn) * cwn - origin_width) print("padding_y,padding_x, origin_height, origin_width", padding_y, padding_x, origin_height, origin_width) # top, bottom, left, right - border width in number of pixels in corresponding directions im = cv2.copyMakeBorder(im, 0, padding_x, 0, padding_y, cv2.BORDER_CONSTANT, value=[0, 0, 0]) # the section below could be optimized. but basically the idea is to re-calculate all the values arr = np.array(im) width, height, _ = arr.shape cwn, chn = (portion, portion) wn, hn = (int(width / cwn), int(height / chn)) image_list = chip_image(im, (portion, portion)) for im in image_list: target_size = portion max_size = portion im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) # print("im.shape,im_scale",im.shape,im_scale) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_xview_480_640_800_alltrain'), 11, process=True) # arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_coco' if not args.fpn_only else 'fpn_coco'), 0, process=True) print("loading parameter done") if args.cpu_only: predictor = Predictor(sym, data_names, label_names, context=[mx.cpu()], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(config.TEST.NMS) else: predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(args.gpu_index)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) num_preds = int(5000 * math.ceil(float(portion) / 400)) # test boxes, scores, classes = generate_detections(data, data_names, predictor, config, nms, image_list, num_preds) #Process boxes to be full-sized print("boxes shape is", boxes.shape, "wn, hn", wn, hn, "width, height", width, height) bfull = boxes.reshape((wn, hn, num_preds, 4)) for i in range(wn): for j in range(hn): bfull[i, j, :, 0] += j * cwn bfull[i, j, :, 2] += j * cwn bfull[i, j, :, 1] += i * chn bfull[i, j, :, 3] += i * chn # clip values bfull[i, j, :, 0] = np.clip(bfull[i, j, :, 0], 0, origin_height) bfull[i, j, :, 2] = np.clip(bfull[i, j, :, 2], 0, origin_height) bfull[i, j, :, 1] = np.clip(bfull[i, j, :, 1], 0, origin_width) bfull[i, j, :, 3] = np.clip(bfull[i, j, :, 3], 0, origin_width) bfull = bfull.reshape((hn * wn, num_preds, 4)) scores = scores.reshape((hn * wn, num_preds)) classes = classes.reshape((hn * wn, num_preds)) #only display boxes with confidence > .5 # print(bfull, scores, classes) #bs = bfull[scores > 0.08] #cs = classes[scores>0.08] #print("bfull.shape,scores.shape, bs.shape",bfull.shape,scores.shape, bs.shape) # s = im_name # draw_bboxes(arr,bs,cs).save("/tmp/"+s[0].split(".")[0] + ".png") #scoring_line_threshold = 11000 #if bs.shape[0] > scoring_line_threshold: # too many predictions, we should trim the low confidence ones with open(args.output, 'w') as f: for i in range(bfull.shape[0]): for j in range(bfull[i].shape[0]): #box should be xmin ymin xmax ymax box = bfull[i, j] class_prediction = classes[i, j] score_prediction = scores[i, j] if int(class_prediction) != 0: f.write('%d %d %d %d %d %f \n' % \ (box[0], box[1], box[2], box[3], int(class_prediction), score_prediction)) print('done')
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_fgfa_flownet_vid' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names num_classes = 31 classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn_fgfa/' if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor}) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] if(idx != len(data)-1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time()-t1 if (cfg.TEST.SEQ_NMS==False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1)) file_idx += 1 end_counter+=1 if(cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def main(tempFileList, fileOp): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) out_dir = os.path.join( cur_path, 'demo/output/terror-det-rg-data-output/terror-det-v0.9-test/JPEGImages' ) if not os.path.exists(out_dir): os.makedirs(out_dir) # set up class names num_classes = 7 classes = [ 'tibetan flag', 'guns', 'knives', 'not terror', 'islamic flag', 'isis flag' ] # load demo data image_names = tempFileList data = [] for im_name in image_names: im_file = im_name print(im_file) im = cv2.imread(im_file, cv2.IMREAD_COLOR) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/demo/models/' + ('rfcn_voc'), 10, process=True) #modify by zxt #mx.model.save_checkpoint('f1/final', 10, sym, arg_params, aux_params) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test # fileOp = open(os.path.join(cur_path, 'terror-det-rg-test-result.txt'), 'w') fileOp = fileOp for idx, im_name in enumerate(image_names): print("begining process %s" % (im_name)) data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im_result = show_boxes(fileOp, im_name, im, dets_nms, classes, 1) cv2.imwrite(out_dir + im_name.split('/')[-1], im_result) print 'done'
def batch_extract(self, multiple=True, gt_dir=None, epoch=0): """ :param multiple: :param gt_dir: :return: """ if len(self.img_list) % self.batch_size != 0: batch = len(self.img_list) / self.batch_size + 1 else: batch = len(self.img_list) / self.batch_size for i in xrange(batch): if i < batch - 1: self.batch_list = self.img_list[i * self.batch_size:(i + 1) * self.batch_size] else: self.batch_list = self.img_list[i * self.batch_size:] print '\nMini-batch %d\t' % (i + 1) tmp_data = [] target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] tic() for img in self.batch_list: assert os.path.exists(img), ('%s does not exist.'.format(img)) im = cv2.imread( img, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) # im_info: height, width, scale im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) tmp_data.append({ self.data_names[0]: im_tensor, self.data_names[1]: im_info }) self.ctx = [int(i) for i in config.gpus.split(',')] self.data = [[ mx.nd.array(tmp_data[i][name], mx.gpu(self.ctx[0])) for name in self.data_names ] for i in xrange(len(tmp_data))] max_data_shape = [[(self.data_names[0], (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(self.data_names, self.data[i])] for i in xrange(len(self.data))] provide_label = [None for i in xrange(len(self.data))] arg_params, aux_params = load_param(self.model_dir, epoch, process=True) self.predictor = Predictor(self.sym, self.data_names, self.label_name, context=[mx.gpu(self.ctx[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) print 'preparation: %.4fs' % toc() if i == 0: self.warmup() self.forward(multiple=multiple, gt_dir=gt_dir) self.cleaner()
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 19 # load demo data image_names = ['frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data'] label_names = ['softmax_label'] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) tic() output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] pallete = getpallete(256) segmentation_result = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(segmentation_result) segmentation_result.putpalette(pallete) print 'testing {} {:.4f}s'.format(im_name, toc()) pure_im_name, ext_im_name = os.path.splitext(im_name) segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png') # visualize im_raw = cv2.imread(cur_path + '/../demo/' + im_name) seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png') cv2.imshow('Raw Image', im_raw) cv2.imshow('segmentation_result', seg_res) cv2.waitKey(0) print 'done'
def im_detect_bbox_aug(net, nms_wrapper, img_path, scales, pixel_means, bbox_stds, ctx, threshold=1e-3, viz=False): all_bboxes = [] all_scores = [] img_ori = cv2.imread(img_path.encode("utf-8")) for scale_min, scale_max in scales: fscale = 1.0 * scale_min / min(img_ori.shape[:2]) img_resized = cv2.resize(img_ori, (0, 0), fx=fscale, fy=fscale) h, w, c = img_resized.shape h_padded = h if h % 32 == 0 else h + 32 - h % 32 w_padded = w if w % 32 == 0 else w + 32 - w % 32 img_padded = np.zeros(shape=(h_padded, w_padded, c), dtype=img_resized.dtype) img_padded[:h, :w, :] = img_resized img = transform(img_padded, pixel_means=pixel_means) im_info = nd.array([[h_padded, w_padded, 1.0]], ctx=ctx[0]) data = nd.array(img, ctx=ctx[0]) rois, scores, bbox_deltas = net(data, im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) # hflip rois, scores, bbox_deltas = net(data[:, :, :, ::-1], im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) tmp = bbox[:, 0::4].copy() bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1 # x0 = w - x0 bbox[:, 2::4] = data.shape[3] - tmp - 1 # x1 = w -x1 bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) # # vflip rois, scores, bbox_deltas = net(data[:, :, ::-1], im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) tmp = bbox[:, 1::4].copy() bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1 # x0 = w - x0 bbox[:, 3::4] = data.shape[2] - tmp - 1 # x1 = w -x1 bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) # vhflip rois, scores, bbox_deltas = net(data[:, :, ::-1, ::-1], im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) tmp = bbox[:, 1::4].copy() bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1 # x0 = w - x0 bbox[:, 3::4] = data.shape[2] - tmp - 1 # x1 = w -x1 tmp = bbox[:, 0::4].copy() bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1 # x0 = w - x0 bbox[:, 2::4] = data.shape[3] - tmp - 1 # x1 = w -x1 bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) all_bboxes = np.concatenate(all_bboxes, axis=0) all_scores = np.concatenate(all_scores, axis=0) pred_bboxes = [] pred_scores = [] pred_clsid = [] for j in range(1, all_scores.shape[1]): cls_scores = all_scores[:, j, np.newaxis] cls_boxes = all_bboxes[:, 4: 8] if config.CLASS_AGNOSTIC else all_bboxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms_wrapper(cls_dets.astype('f')) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > threshold, :] pred_bboxes.append(cls_dets[:, :4]) pred_scores.append(cls_dets[:, 4]) pred_clsid.append(j * np.ones(shape=(cls_dets.shape[0], ), dtype=np.int)) pred_bboxes = np.concatenate(pred_bboxes, axis=0) pred_scores = np.concatenate(pred_scores, axis=0) pred_clsid = np.concatenate(pred_clsid, axis=0) if viz: import gluoncv import matplotlib.pyplot as plt gluoncv.utils.viz.plot_bbox(img_ori[:, :, ::-1], bboxes=pred_bboxes, scores=pred_scores, labels=pred_clsid, thresh=.5) plt.show() return pred_bboxes, pred_scores, pred_clsid
def predict(self, images, feat_output, aggr_feat_output): model = self.model all_frame_interval = self.all_frame_interval feat_sym = self.feat_sym aggr_sym = self.aggr_sym num_classes = self.num_classes classes = self.classes max_per_image = self.max_per_image output_dir = cur_path + '/../demo/rfcn_fgfa_{}/'.format(self.index) self.index += 1 if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im in images: target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({ 'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor }) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (11, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((11, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype( np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype( np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch( data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) assert len(aggr_feat) == 1 data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) assert len(aggr_feat) == 1 out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 if (cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [ all_boxes[j][idx] for j in range(1, num_classes) ] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' model = '/../model/rfcn_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_test_symbol(config) # set up class names num_classes = 31 classes = ['airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn/' if not os.path.exists(output_dir): os.makedirs(output_dir) # data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) time += toc() count += 1 print 'testing {} {:.4f}s'.format(im_name, time/count) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename,out_im) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names; Don't count the background in, even we are treat the background as label '0' num_classes = 4 classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights'] # load demo data image_path = './data/RoadImages/test/' image_names = glob.glob(image_path + '*.jpg') print("Image amount {}".format(len(image_names))) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][1] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road', 19, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # test notation_dict = {} for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # notation_list.append(get_notation(im_name, dets_nms, classes, scale=1.0, gen_bbox_pic=True)) notation_dict.update( get_notation(im_name, dets_nms, classes, scale=1.0, gen_bbox_pic=True)) save_notation_file(notation_dict) print 'done'
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # load demo data image_names = [] names_dirs = os.listdir(cur_path + '/../' + test_dir) for im_name in names_dirs: if im_name[-4:] == '.jpg' or im_name[-4:] == '.png': image_names.append(im_name) data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../' + test_dir + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../' + test_dir + im_name, cv2.IMREAD_COLOR | long(128)) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] #print "before scale: " #print im.shape im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) #print "after scale: " #print im.shape #im_scale = 1.0 #print "scale ratio: " #print im_scale im_tensor = transform(im, config.network.PIXEL_MEANS) #print im_tensor.shape im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../' + model_dir, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, [1.0], config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] #print im_shapes if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] result_masks, result_dets = gpu_mask_voting( masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1], im_shapes[0][0], config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(cur_path + '/../' + test_dir + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False) # Save img cv2.imwrite(cur_path + '/../' + result_dir + im_name, cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) print 'done'