def get_batch(self): # slice roidb cur_from = self.cur cur_to = min(cur_from + self.batch_size, self.size) roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)] # decide multi device slices work_load_list = self.work_load_list ctx = self.ctx if work_load_list is None: work_load_list = [1] * len(ctx) assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \ "Invalid settings for work load. " slices = _split_input_slice(self.batch_size, work_load_list) # get each device data_list = [] label_list = [] for islice in slices: iroidb = [roidb[i] for i in range(islice.start, islice.stop)] data, label = get_rcnn_batch(iroidb, self.cfg) data_list.append(data) label_list.append(label) all_data = dict() for key in data_list[0].keys(): all_data[key] = tensor_vstack([batch[key] for batch in data_list]) all_label = dict() for key in label_list[0].keys(): all_label[key] = tensor_vstack([batch[key] for batch in label_list]) self.data = [mx.nd.array(all_data[name]) for name in self.data_name] self.label = [mx.nd.array(all_label[name]) for name in self.label_name]
def get_batch(self): # slice roidb cur_from = self.cur cur_to = min(cur_from + self.batch_size, self.size) roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)] # decide multi device slice work_load_list = self.work_load_list ctx = self.ctx if work_load_list is None: work_load_list = [1] * len(ctx) assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \ "Invalid settings for work load. " slices = _split_input_slice(self.batch_size, work_load_list) # get testing data for multigpu data_list = [] label_list = [] for islice in slices: iroidb = [roidb[i] for i in range(islice.start, islice.stop)] data, label = get_rpn_batch(iroidb, self.cfg) data_list.append(data) label_list.append(label) # pad data first and then assign anchor (read label) data_tensor = tensor_vstack([batch['data'] for batch in data_list]) for data, data_pad in zip(data_list, data_tensor): data['data'] = data_pad[np.newaxis, :] new_label_list = [] for data, label in zip(data_list, label_list): # infer label shape data_shape = {k: v.shape for k, v in data.items()} del data_shape['im_info'] _, feat_shape, _ = self.feat_sym.infer_shape(**data_shape) feat_shape = [int(i) for i in feat_shape[0]] # add gt_boxes to data for e2e data['gt_boxes'] = label['gt_boxes'][np.newaxis, :, :] # assign anchor for label label = assign_anchor(feat_shape, label['gt_boxes'], data['im_info'], self.cfg, self.feat_stride, self.anchor_scales, self.anchor_ratios, self.allowed_border, self.normalize_target, self.bbox_mean, self.bbox_std) new_label_list.append(label) all_data = dict() for key in self.data_name: all_data[key] = tensor_vstack([batch[key] for batch in data_list]) all_label = dict() for key in self.label_name: pad = -1 if key == 'label' else 0 all_label[key] = tensor_vstack([batch[key] for batch in new_label_list], pad=pad) self.data = [mx.nd.array(all_data[key]) for key in self.data_name] self.label = [mx.nd.array(all_label[key]) for key in self.label_name]
def get_rcnn_batch(roidb, cfg): """ return a dict of multiple images :param roidb: a list of dict, whose length controls batch size ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets'] :return: data, label """ num_images = len(roidb) imgs, roidb = get_image(roidb, cfg) im_array = tensor_vstack(imgs) assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \ 'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS) if cfg.TRAIN.BATCH_ROIS == -1: rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb]) fg_rois_per_image = rois_per_image else: rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int) rois_array = list() labels_array = list() bbox_targets_array = list() bbox_weights_array = list() for im_i in range(num_images): roi_rec = roidb[im_i] # infer num_classes from gt_overlaps num_classes = roi_rec['gt_overlaps'].shape[1] # label = class RoI has max overlap with rois = roi_rec['boxes'] labels = roi_rec['max_classes'] overlaps = roi_rec['max_overlaps'] bbox_targets = roi_rec['bbox_targets'] im_rois, labels, bbox_targets, bbox_weights = \ sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels, overlaps, bbox_targets) # project im_rois # do not round roi rois = im_rois batch_index = im_i * np.ones((rois.shape[0], 1)) rois_array_this_image = np.hstack((batch_index, rois)) rois_array.append(rois_array_this_image) # add labels labels_array.append(labels) bbox_targets_array.append(bbox_targets) bbox_weights_array.append(bbox_weights) rois_array = np.array(rois_array) labels_array = np.array(labels_array) bbox_targets_array = np.array(bbox_targets_array) bbox_weights_array = np.array(bbox_weights_array) data = {'data': im_array, 'rois': rois_array} label = {'label': labels_array, 'bbox_target': bbox_targets_array, 'bbox_weight': bbox_weights_array} return data, label
def get_rcnn_batch(roidb, cfg): """ return a dict of multiple images :param roidb: a list of dict, whose length controls batch size ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets'] :return: data, label """ num_images = len(roidb) imgs, roidb = get_image(roidb, cfg) im_array = tensor_vstack(imgs) assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \ 'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS) if cfg.TRAIN.BATCH_ROIS == -1: rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb]) fg_rois_per_image = rois_per_image else: rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int) rois_array = list() labels_array = list() bbox_targets_array = list() bbox_weights_array = list() for im_i in range(num_images): roi_rec = roidb[im_i] # infer num_classes from gt_overlaps num_classes = roi_rec['gt_overlaps'].shape[1] # label = class RoI has max overlap with rois = roi_rec['boxes'] labels = roi_rec['max_classes'] overlaps = roi_rec['max_overlaps'] bbox_targets = roi_rec['bbox_targets'] im_rois, labels, bbox_targets, bbox_weights = \ sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels, overlaps, bbox_targets) # project im_rois # do not round roi rois = im_rois batch_index = im_i * np.ones((rois.shape[0], 1)) rois_array_this_image = np.hstack((batch_index, rois)) rois_array.append(rois_array_this_image) # add labels labels_array.append(labels) bbox_targets_array.append(bbox_targets) bbox_weights_array.append(bbox_weights) rois_array = np.array(rois_array) labels_array = np.array(labels_array) bbox_targets_array = np.array(bbox_targets_array) bbox_weights_array = np.array(bbox_weights_array) data = {'data': im_array, 'rois': rois_array} label = { 'label': labels_array, 'bbox_target': bbox_targets_array, 'bbox_weight': bbox_weights_array } return data, label
def get_batch(self): # slice roidb cur_from = self.cur cur_to = min(cur_from + self.batch_size, self.size) roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)] # decide multi device slice work_load_list = self.work_load_list ctx = self.ctx if work_load_list is None: work_load_list = [1] * len(ctx) assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \ "Invalid settings for work load. " slices = _split_input_slice(self.batch_size, work_load_list) # get testing data for multigpu data_list = [] label_list = [] for islice in slices: iroidb = [roidb[i] for i in range(islice.start, islice.stop)] data, label = get_rpn_batch(iroidb, self.cfg) data_list.append(data) label_list.append(label) # pad data first and then assign anchor (read label) data_tensor = tensor_vstack([batch['data'] for batch in data_list]) for data, data_pad in zip(data_list, data_tensor): data['data'] = data_pad[np.newaxis, :] new_label_list = [] for data, label in zip(data_list, label_list): # infer label shape data_shape = {k: v.shape for k, v in data.items()} del data_shape['im_info'] _, feat_shape_p3, _ = self.feat_sym_p3.infer_shape(**data_shape) feat_shape_p3 = [int(i) for i in feat_shape_p3[0]] _, feat_shape_p4, _ = self.feat_sym_p4.infer_shape(**data_shape) feat_shape_p4 = [int(i) for i in feat_shape_p4[0]] _, feat_shape_p5, _ = self.feat_sym_p5.infer_shape(**data_shape) feat_shape_p5 = [int(i) for i in feat_shape_p5[0]] # add gt_boxes to data for e2e data['gt_boxes'] = label['gt_boxes'][np.newaxis, :, :] # assign anchor for label label = assign_anchor(feat_shape_p3, feat_shape_p4, feat_shape_p5, label['gt_boxes'], data['im_info'], self.cfg, self.feat_stride_p3, self.anchor_scales_p3, self.anchor_ratios_p3, self.feat_stride_p4, self.anchor_scales_p4, self.anchor_ratios_p4, self.feat_stride_p5, self.anchor_scales_p5, self.anchor_ratios_p5, self.allowed_border) new_label_list.append(label) all_data = dict() for key in self.data_name: all_data[key] = tensor_vstack([batch[key] for batch in data_list]) all_label = dict() for key in self.label_name: pad = -1 if key == 'label' else 0 all_label[key] = tensor_vstack( [batch[key] for batch in new_label_list], pad=pad) self.data = [mx.nd.array(all_data[key]) for key in self.data_name] self.label = [mx.nd.array(all_label[key]) for key in self.label_name]
def parfetch(config, crop_width, crop_height, isegdb): # get testing data for multigpu if config.dataset.dataset == "PascalVOC" or config.dataset.dataset == "ADE20K": datas = {} labels = {} datas['data'], labels['label'] = get_segmentation_image_voc( isegdb, config) if config.network.use_metric: labels['metric_label'] = generate_metric_label(labels['label']) if config.TRAIN.use_mult_metric: for i in [1, 2, 4]: labels['metric_label_' + str(i)] = generate_metric_label( labels['label'], skip_step=i) return {'data': datas, 'label': labels} else: datas, labels = get_segmentation_train_batch(isegdb, config) feature_stride = config.network.LABEL_STRIDE network_ratio = config.network.ratio if config.TRAIN.enable_crop: datas_internal = datas['data'] labels_internal = labels['label'] sx = math.floor(random.random() * (datas_internal.shape[3] - crop_width + 1)) sy = math.floor(random.random() * (datas_internal.shape[2] - crop_height + 1)) sx = (int)(sx) sy = (int)(sy) assert (sx >= 0 and sx < datas_internal.shape[3] - crop_width + 1) assert (sy >= 0 and sy < datas_internal.shape[2] - crop_height + 1) ex = (int)(sx + crop_width - 1) ey = (int)(sy + crop_height - 1) datas_internal = datas_internal[:, :, sy:ey + 1, sx:ex + 1] labels_internal = labels_internal[:, :, sy:ey + 1, sx:ex + 1] if config.network.use_crop_context: crop_context_scale = config.network.crop_context_scale scale_width = make_divisible( int(float(crop_width) / crop_context_scale), feature_stride) scale_height = make_divisible( int(float(crop_height) / crop_context_scale), feature_stride) pad_width = int(scale_width - crop_width) / 2 pad_height = int(scale_height - crop_height) / 2 datas['origin_data'] = np.zeros( (datas['data'].shape[0], datas['data'].shape[1], datas['data'].shape[2] + 2 * int(pad_height), datas['data'].shape[3] + 2 * int(pad_width))) datas['origin_data'][:, :, int(pad_height):datas['data'].shape[2] + int(pad_height), int(pad_width):datas['data'].shape[3] + int(pad_width)] = datas['data'] labels['origin_label'] = np.full( (labels['label'].shape[0], labels['label'].shape[1], labels['label'].shape[2] + 2 * int(pad_height), labels['label'].shape[3] + 2 * int(pad_width)), 255) labels[ 'origin_label'][:, :, int(pad_height):labels['label'].shape[2] + int(pad_height), int(pad_width):labels['label'].shape[3] + int(pad_width)] = labels['label'] datas_origin = datas['origin_data'][:, :, sy:sy + scale_height, sx:sx + scale_width] labels_origin = labels['origin_label'][:, :, sy:sy + scale_height, sx:sx + scale_width] datas['origin_data'] = datas_origin labels['origin_label'] = labels_origin # labels_origin_in = np.zeros((labels['origin_label'].shape[0],labels['origin_label'].shape[1], # labels['origin_label'].shape[2]//feature_stride,labels['origin_label'].shape[3]//feature_stride)) # for i, label in enumerate(labels['origin_label']): # label_im = Image.fromarray(np.squeeze(label.astype(np.uint8, copy=False))).resize( # (labels['origin_label'].shape[3] // feature_stride, # labels['origin_label'].shape[2] // feature_stride), Image.NEAREST) # label = np.array(label_im) # labels_origin_in[i, 0, :, :] = label # # labels['origin_label']=labels_origin_in rois = [] for i, im_info in zip(xrange(datas_internal.shape[0]), datas['im_info']): rois.append( np.array([ i, pad_width, pad_height, pad_width + crop_width, pad_height + crop_height ]).reshape((1, 5))) datas['rois'] = tensor_vstack(rois) # print rois datas['data'] = datas_internal labels['label'] = labels_internal else: rois = [] for i, im_info in zip(xrange(datas_internal.shape[0]), datas['im_info']): scale = im_info[2] rois.append( np.array([ i, sx * network_ratio / scale, sy * network_ratio / scale, (ex + 1) * network_ratio / scale, (ey + 1) * network_ratio / scale ]).reshape((1, 5))) datas['rois'] = tensor_vstack(rois) datas['data'] = datas_internal labels['label'] = labels_internal assert (datas['data'].shape[2] == crop_height) and (datas['data'].shape[3] == crop_width) else: datas_internal = datas['data'] rois = [] for i, im_info in zip(xrange(datas_internal.shape[0]), datas['im_info']): im_size = im_info[:2] rois.append( np.array([ i, 0, 0, im_size[1] * network_ratio, im_size[0] * network_ratio ]).reshape((1, 5))) datas['rois'] = tensor_vstack(rois) # if feature_stride == 1: # assert (labels['label'].shape[2] == crop_height) and (labels['label'].shape[3] == crop_width) # else: labels_in = dict() labels_in['origin_label'] = labels['origin_label'] labels_in['label'] = np.zeros( (labels['label'].shape[0], labels['label'].shape[1], labels['label'].shape[2] // feature_stride, labels['label'].shape[3] // feature_stride)) # to reshape the label to the network label for i, label in enumerate(labels['label']): label_im = Image.fromarray( np.squeeze(label.astype(np.uint8, copy=False))).resize( (labels['label'].shape[3] // feature_stride, labels['label'].shape[2] // feature_stride), Image.NEAREST) label = np.array(label_im) labels_in['label'][i, 0, :, :] = label labels = labels_in if config.TRAIN.enable_ignore_border: labels['label'] = border_ignore_label( labels['label'], config.TRAIN.ignore_border_size, 255.0) if config.network.use_metric: labels['metric_label'] = generate_metric_label(labels['label']) if config.TRAIN.use_mult_metric: scale_name = ['a', 'b', 'c'] if config.network.scale_list == [1, 2, 4]: scale_name = ['', '', ''] for idx, i in enumerate(config.network.scale_list): labels['metric_label_' + str(i) + scale_name[idx]] = generate_metric_label( labels['label'], skip_step=i) return {'data': datas, 'label': labels}
def get_rcnn_batch(roidb, cfg): """ return a dict of multiple images :param roidb: a list of dict, whose length controls batch size ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets'] :return: data, label """ num_images = len(roidb) imgs, roidb = get_image(roidb, cfg) im_array = tensor_vstack(imgs) assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \ 'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS) if cfg.TRAIN.BATCH_ROIS == -1: rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb]) fg_rois_per_image = rois_per_image else: rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int) if cfg.network.ROIDispatch: rois_array_0 = list() rois_array_1 = list() rois_array_2 = list() rois_array_3 = list() else: rois_array = list() gt_labels_array = list() labels_array = list() bbox_targets_array = list() bbox_weights_array = list() for im_i in range(num_images): roi_rec = roidb[im_i] # infer num_classes from gt_overlaps num_classes = roi_rec['gt_overlaps'].shape[1] # label = class RoI has max overlap with rois = roi_rec['boxes'] labels = roi_rec['max_classes'] overlaps = roi_rec['max_overlaps'] bbox_targets = roi_rec['bbox_targets'] gt_lables = roi_rec['is_gt'] if cfg.TRAIN.BATCH_ROIS == -1: im_rois, labels_t, bbox_targets, bbox_weights = \ sample_rois_v2(rois, num_classes, cfg, labels=labels, overlaps=overlaps, bbox_targets=bbox_targets, gt_boxes=None) assert np.abs(im_rois - rois).max() < 1e-3 assert np.abs(labels_t - labels).max() < 1e-3 else: im_rois, labels, bbox_targets, bbox_weights, gt_lables = \ sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels, overlaps, bbox_targets, gt_lables=gt_lables) # project im_rois # do not round roi if cfg.network.ROIDispatch: w = im_rois[:, 2] - im_rois[:, 0] + 1 h = im_rois[:, 3] - im_rois[:, 1] + 1 feat_id = np.clip(np.floor(2 + np.log2(np.sqrt(w * h) / 224)), 0, 3).astype(int) rois_0_idx = np.where(feat_id == 0)[0] rois_0 = im_rois[rois_0_idx] if len(rois_0) == 0: rois_0 = np.zeros((1, 4)) label_0 = -np.ones((1,)) gt_label_0 = -np.ones((1,)) bbox_targets_0 = np.zeros((1, bbox_targets.shape[1])) bbox_weights_0 = np.zeros((1, bbox_weights.shape[1])) else: label_0 = labels[rois_0_idx] gt_label_0 = gt_lables[rois_0_idx] bbox_targets_0 = bbox_targets[rois_0_idx] bbox_weights_0 = bbox_weights[rois_0_idx] rois_1_idx = np.where(feat_id == 1)[0] rois_1 = im_rois[rois_1_idx] if len(rois_1) == 0: rois_1 = np.zeros((1, 4)) label_1 = -np.ones((1,)) gt_label_1 = -np.ones((1,)) bbox_targets_1 = np.zeros((1, bbox_targets.shape[1])) bbox_weights_1 = np.zeros((1, bbox_weights.shape[1])) else: label_1 = labels[rois_1_idx] gt_label_1 = gt_lables[rois_1_idx] bbox_targets_1 = bbox_targets[rois_1_idx] bbox_weights_1 = bbox_weights[rois_1_idx] rois_2_idx = np.where(feat_id == 2) rois_2 = im_rois[rois_2_idx] if len(rois_2) == 0: rois_2 = np.zeros((1, 4)) label_2 = -np.ones((1,)) gt_label_2 = -np.ones((1,)) bbox_targets_2 = np.zeros((1, bbox_targets.shape[1])) bbox_weights_2 = np.zeros((1, bbox_weights.shape[1])) else: label_2 = labels[rois_2_idx] gt_label_2 = gt_lables[rois_2_idx] bbox_targets_2 = bbox_targets[rois_2_idx] bbox_weights_2 = bbox_weights[rois_2_idx] rois_3_idx = np.where(feat_id == 3) rois_3 = im_rois[rois_3_idx] if len(rois_3) == 0: rois_3 = np.zeros((1, 4)) label_3 = -np.ones((1,)) gt_label_3 = -np.ones((1,)) bbox_targets_3 = np.zeros((1, bbox_targets.shape[1])) bbox_weights_3 = np.zeros((1, bbox_weights.shape[1])) else: label_3 = labels[rois_3_idx] gt_label_3 = gt_lables[rois_3_idx] bbox_targets_3 = bbox_targets[rois_3_idx] bbox_weights_3 = bbox_weights[rois_3_idx] # stack batch index rois_array_0.append(np.hstack((im_i * np.ones((rois_0.shape[0], 1)), rois_0))) rois_array_1.append(np.hstack((im_i * np.ones((rois_1.shape[0], 1)), rois_1))) rois_array_2.append(np.hstack((im_i * np.ones((rois_2.shape[0], 1)), rois_2))) rois_array_3.append(np.hstack((im_i * np.ones((rois_3.shape[0], 1)), rois_3))) labels = np.concatenate([label_0, label_1, label_2, label_3], axis=0) gt_lables = np.concatenate([gt_label_0, gt_label_1, gt_label_2, gt_label_3], axis=0) bbox_targets = np.concatenate([bbox_targets_0, bbox_targets_1, bbox_targets_2, bbox_targets_3], axis=0) bbox_weights = np.concatenate([bbox_weights_0, bbox_weights_1, bbox_weights_2, bbox_weights_3], axis=0) else: rois = im_rois batch_index = im_i * np.ones((rois.shape[0], 1)) rois_array_this_image = np.hstack((batch_index, rois)) rois_array.append(rois_array_this_image) # add labels gt_labels_array.append(gt_lables) labels_array.append(labels) bbox_targets_array.append(bbox_targets) bbox_weights_array.append(bbox_weights) gt_labels_array = np.array(gt_labels_array) nongt_index_array = np.where(gt_labels_array == 0)[1] labels_array = np.array(labels_array) bbox_targets_array = np.array(bbox_targets_array) bbox_weights_array = np.array(bbox_weights_array) if cfg.network.USE_NONGT_INDEX: label = {'label': labels_array, 'nongt_index': nongt_index_array, 'bbox_target': bbox_targets_array, 'bbox_weight': bbox_weights_array} else: label = {'label': labels_array, 'bbox_target': bbox_targets_array, 'bbox_weight': bbox_weights_array} if cfg.network.ROIDispatch: rois_array_0 = np.array(rois_array_0) rois_array_1 = np.array(rois_array_1) rois_array_2 = np.array(rois_array_2) rois_array_3 = np.array(rois_array_3) # rois_concate = np.concatenate((rois_array_0, rois_array_1, rois_array_2, rois_array_3), axis=1) # gt_rois_t = rois_concate[:, gt_labels_array[0,:] > 0] data = {'data': im_array, 'rois_0': rois_array_0, 'rois_1': rois_array_1, 'rois_2': rois_array_2, 'rois_3': rois_array_3} else: rois_array = np.array(rois_array) data = {'data': im_array, 'rois': rois_array} if cfg.TRAIN.LEARN_NMS: # im info im_info = np.array([roidb[0]['im_info']], dtype=np.float32) # gt_boxes if roidb[0]['gt_classes'].size > 0: gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] else: gt_boxes = np.empty((0, 5), dtype=np.float32) data['im_info'] = im_info data['gt_boxes'] = gt_boxes return data, label
def get_segmentation_test_batch(segdb, config, is_train=False, has_label=True, scale=1.0): """ return a dict of train batch :param segdb: ['image', 'flipped'] :param config: the config setting :return: data, label, im_info """ imgs, seg_cls_gts, segdb, origin_ims, origin_labels = get_segmentation_image( segdb, config, is_train=is_train, has_label=has_label, scale=scale) im_array = tensor_vstack(imgs) if has_label: seg_cls_gt = tensor_vstack(seg_cls_gts) else: seg_cls_gt = [] im_info = tensor_vstack( [np.array([isegdb['im_info']], dtype=np.float32) for isegdb in segdb]) origin_im = tensor_vstack(origin_ims) rois = [] if config.network.use_crop_context: crop_context_scale = config.network.crop_context_scale crop_height, crop_width = config.TRAIN.crop_size feature_stride = config.network.LABEL_STRIDE scale_width = make_divisible( int(float(crop_width) / crop_context_scale), feature_stride) scale_height = make_divisible( int(float(crop_height) / crop_context_scale), feature_stride) pad_width = int(scale_width - crop_width) / 2 pad_height = int(scale_height - crop_height) / 2 origin_data = np.zeros((im_array.shape[0], im_array.shape[1], im_array.shape[2] + 2 * int(pad_height), im_array.shape[3] + 2 * int(pad_width))) origin_data[:, :, int(pad_height):im_array.shape[2] + int(pad_height), int(pad_width):im_array.shape[3] + int(pad_width)] = im_array for i, im_info in enumerate(im_info): im_size = im_info[:2] rois.append( np.array([ i, pad_width, pad_height, pad_width + im_size[1], pad_width + im_size[0] ]).reshape((1, 5))) rois = tensor_vstack(rois) # print rois else: network_ratio = config.network.ratio for i, im_info in enumerate(im_info): im_size = im_info[:2] rois.append( np.array([ i, 0, 0, im_size[1] * network_ratio, im_size[0] * network_ratio ]).reshape((1, 5))) rois = tensor_vstack(rois) print rois data = { 'data': im_array, 'im_info': im_info, 'origin_data': origin_im, 'rois': rois } label = {'label': seg_cls_gt} return {'data': data, 'label': label}
def pred_eval(predictor, test_data, imdb, vis=False, ignore_cache=None, logger=None): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param ignore_cache: ignore the saved cache file :param logger: the logger instance :return: """ res_file = os.path.join(imdb.result_path, imdb.name + '_segmentations.pkl') if output_median: output_median_dir = os.path.join(imdb.result_path, 'numpy_output') if not os.path.exists(output_median_dir): os.makedirs(output_median_dir) if os.path.exists(res_file) and not ignore_cache: with open(res_file , 'rb') as fid: evaluation_results = cPickle.load(fid) meanIU = evaluation_results['meanIU'] IU_array = evaluation_results['IU_array'] logger.info('evaluate segmentation: \n') logger.info('class IU_array:\n') logger.info(str(IU_array*100)) logger.info( 'meanIU:%.5f'%(meanIU*100)) return assert vis or not test_data.shuffle if test_data.has_label: scorer = ScoreUpdater(np.arange(config.dataset.NUM_CLASSES), config.dataset.NUM_CLASSES, test_data.size, logger) scorer.reset() all_segmentation_result = [[] for _ in xrange(imdb.num_images)] num_steps = config.TEST.num_steps use_flipping = config.TEST.use_flipping num_class = config.dataset.NUM_CLASSES label_stride = config.network.LABEL_STRIDE idx = 0 save_feature = 0 if config.network.use_metric: output_name = "FUSION_softmax_output" else: output_name = "softmax_output" # output_name = "FUSION_softmax_output" name_i = 0 for index,data_batch in enumerate(test_data): origin_data_shapes = [ (data_shape[0][1][0], data_shape[0][1][1], data_shape[0][1][2] , data_shape[0][1][3]) for data_shape in data_batch.provide_data] softmax_outputs_scales = [] logger.info("#####################################") batch_size = 0 for data_shape in origin_data_shapes: batch_size += data_shape[0] softmax_batch_predictions = predictor.predict() #3.get the final label prediction and save the softmax to the h5 format label_predictions=[] for batch_softmax_output in softmax_batch_predictions: label_predictions.extend([np.argmax(softmax_output, axis=0) for softmax_output in batch_softmax_output]) if config.TEST.save_h5py: save_batch_softmax_ouputs(imdb.result_path,test_data.segdb[idx:idx++test_data.batch_size - data_batch.pad],softmax_batch_predictions) #4.crop the prediction and the ground truth label_predictions_new = [] for j, label_prediction in zip(xrange(len(label_predictions)), label_predictions): seg_rec = test_data.segdb[index * test_data.batch_size + j] imh, imw = (seg_rec['height'], seg_rec['width']) label_prediction = label_prediction[:imh, :imw] label_predictions_new.append(label_prediction) label_predictions = label_predictions_new #5.update the online prediction if test_data.has_label: labels_gt = [label[0].asnumpy() for label in test_data.label] labels_gt = tensor_vstack(labels_gt) labels_gt = [label for label in labels_gt] for j,label_prediction,label_gt in zip(xrange(len(label_predictions)),label_predictions,labels_gt): seg_rec = test_data.segdb[index * test_data.batch_size + j] imh, imw = (seg_rec['height'], seg_rec['width']) label_gt = np.squeeze(label_gt[:,:imh, :imw]) if Debug: print label_prediction.shape, label_gt.shape assert label_prediction.shape == label_gt.shape scorer.update(pred_label=label_prediction,label=label_gt,i=index*test_data.batch_size+j) all_segmentation_result[idx: idx+test_data.batch_size - data_batch.pad] = [output.astype('int8') for output in label_predictions] logger.info('Done {}/{}'.format(idx + batch_size, test_data.size)) idx += test_data.batch_size - data_batch.pad #total results logger.info('-------------------------------------------------------') if test_data.has_label: evaluation_results = imdb.evaluate_segmentations(all_segmentation_result) with open(res_file, 'wb') as f: cPickle.dump(evaluation_results, f, protocol=cPickle.HIGHEST_PROTOCOL) meanIU = evaluation_results['meanIU'] IU_array = evaluation_results['IU_array'] logger.info('evaluate segmentation:') logger.info('class IU_array:') logger.info(str(IU_array*100)) logger.info('meanIU:%.5f' % (meanIU*100)) else: imdb.write_segmentation_result(all_segmentation_result) logger.info("write the result done!")
def pred_eval(predictor, test_data, imdb, vis=False, ignore_cache=None, logger=None): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param ignore_cache: ignore the saved cache file :param logger: the logger instance :return: """ res_file = os.path.join(imdb.result_path, imdb.name + '_segmentations.pkl') output_median_dir = os.path.join(imdb.result_path, 'numpy_output') if not os.path.exists(output_median_dir): os.makedirs(output_median_dir) if os.path.exists(res_file) and not ignore_cache: with open(res_file, 'rb') as fid: evaluation_results = cPickle.load(fid) meanIU = evaluation_results['meanIU'] IU_array = evaluation_results['IU_array'] logger.info('evaluate segmentation: \n') logger.info('class IU_array:\n') logger.info(str(IU_array * 100)) logger.info('meanIU:%.5f' % (meanIU * 100)) return assert vis or not test_data.shuffle if test_data.has_label: scorer = ScoreUpdater(np.arange(config.dataset.NUM_CLASSES), config.dataset.NUM_CLASSES, test_data.size, logger) scorer.reset() all_segmentation_result = [[] for _ in xrange(imdb.num_images)] num_steps = config.TEST.num_steps use_flipping = config.TEST.use_flipping num_class = config.dataset.NUM_CLASSES label_stride = config.network.LABEL_STRIDE if config.dataset.dataset == 'cityscapes': feature_stride = label_stride else: feature_stride = label_stride * 4 idx = 0 save_feature = 0 if config.network.use_metric and not config.TRAIN.use_crl_ses: output_name = "FUSION_softmax_output" else: output_name = "softmax_output" #output_name = "FUSION_softmax_output" name_i = 0 for index, data_batch in enumerate(test_data): origin_data_shapes = [(data_shape[0][1][0], data_shape[0][1][1], data_shape[0][1][2], data_shape[0][1][3]) for data_shape in data_batch.provide_data] softmax_outputs_scales = [] logger.info("#####################################") for scale in config.TEST.ms_array: logger.info("Now Scale: %.2f" % scale) test_data.get_batch(scale, True) scale_data_shapes = [(data_shape[0][1][0], data_shape[0][1][1], data_shape[0][1][2], data_shape[0][1][3]) for data_shape in test_data.provide_data] data_shapes = [(scale_data_shape[0], scale_data_shape[1], make_divisible(scale_data_shape[2], feature_stride), make_divisible(scale_data_shape[3], feature_stride)) for scale_data_shape in scale_data_shapes] batch_size = 0 for data_shape in data_shapes: batch_size += data_shape[0] data_batch.provide_data = [[('data', data_shape)] for data_shape in data_shapes] canva_softmax_outputs = [ np.zeros((data_shape[0], num_class, data_shape[2] // label_stride * num_steps, data_shape[3] // label_stride * num_steps)) for data_shape in data_shapes ] canva_datas = [ np.zeros((data_shape[0], data_shape[1], data_shape[2] + label_stride, data_shape[3] + label_stride)) for data_shape in data_shapes ] sy = sx = label_stride // 2 for canva_data, origin_data, data_shape in zip( canva_datas, test_data.data, data_shapes): canva_data[:, :, sy:sy + data_shape[2], sx:sx + data_shape[3]] = resize_batch_target( origin_data[0].asnumpy(), data_shape[3], data_shape[2]) # prepare the start of the strides prediction_stride = label_stride // num_steps sy = sx = prediction_stride // 2 + np.arange( num_steps) * prediction_stride # operation of mult_step for ix in xrange(num_steps): for iy in xrange(num_steps): data_batch.data = [[ mx.nd.array(canva_data[:, :, sy[iy]:sy[iy] + data_shape[2], sx[ix]:sx[ix] + data_shape[3]]) ] for canva_data, data_shape in zip( canva_datas, data_shapes)] output_all = predictor.predict(data_batch) softmax_outputs = [ output[output_name].asnumpy() for output in output_all ] for canva_softmax_output, softmax_output in zip( canva_softmax_outputs, softmax_outputs): canva_softmax_output[:, :, iy::num_steps, ix::num_steps] = softmax_output if use_flipping: data_batch.data = [[ mx.nd.array( canva_data[:, :, sy[iy]:sy[iy] + data_shape[2], sx[ix]:sx[ix] + data_shape[3]][:, :, :, ::-1]) ] for canva_data, data_shape in zip( canva_datas, data_shapes)] output_all = predictor.predict(data_batch) softmax_outputs = [ output[output_name].asnumpy() for output in output_all ] for canva_softmax_output, softmax_output in zip( canva_softmax_outputs, softmax_outputs): canva_softmax_output[:, :, iy::num_steps, ix::num_steps] = 0.5 * ( canva_softmax_output[:, :, iy:: num_steps, ix:: num_steps] + softmax_output[:, :, :, :: -1]) # resize the data inputs and crop the scale inputs final_canva_softmax_outputs = [ np.zeros((scale_data_shape[0], num_class, scale_data_shape[2], scale_data_shape[3])) for scale_data_shape in scale_data_shapes ] for data_shape, scale_data_shape, canva_softmax_output, final_canva_softmax_output in zip( data_shapes, scale_data_shapes, canva_softmax_outputs, final_canva_softmax_outputs): final_canva_softmax_output[:, :, :, :] = resize_batch_softmax_output( canva_softmax_output, scale_data_shape[2:]) softmax_outputs_scales.append(final_canva_softmax_outputs) if output_median: for zi, output_all_batch in enumerate(output_all): output_all_batch_numpy = output_all_batch[ output_median_name].asnumpy() for zj, output_numpy_one in enumerate(output_all_batch_numpy): f = file( os.path.join( output_median_dir, output_median_name + '_' + str(name_i) + ".npy"), "wb") np.save(f, output_numpy_one) name_i += 1 print test_data.segdb[0] print "name", name_i #1.resize the data shape softmax_outputs_scales_new = [] for canva_softmax_outputs in softmax_outputs_scales: batch_softmax_output_list = [] for data_shape, batch_softmax_output in zip( origin_data_shapes, canva_softmax_outputs): if Debug: print "#1:batch_softmax_output ", batch_softmax_output.shape print "#2:target shape ", data_shape[2:] target_size = data_shape[2:] batch_softmax_output = resize_batch_softmax_output( batch_softmax_output, target_size) batch_softmax_output_list.append(batch_softmax_output) softmax_outputs_scales_new.append(batch_softmax_output_list) #2.get the avg softmax prediction softmax_batch_predictions = [ np.zeros((data_shape[0], num_class, data_shape[2], data_shape[3])) for data_shape in origin_data_shapes ] for i in xrange(len(softmax_outputs_scales_new)): for j in xrange(len(data_batch.provide_data)): softmax_batch_predictions[j] += softmax_outputs_scales_new[i][ j] for j in xrange(len(data_batch.provide_data)): softmax_batch_predictions[j] /= float( len(softmax_outputs_scales_new)) #3.get the final label prediction and save the softmax to the h5 format label_predictions = [] for batch_softmax_output in softmax_batch_predictions: label_predictions.extend([ np.argmax(softmax_output, axis=0) for softmax_output in batch_softmax_output ]) if config.TEST.save_h5py: save_batch_softmax_ouputs( imdb.result_path, test_data.segdb[idx:idx + +test_data.batch_size - data_batch.pad], softmax_batch_predictions) #4.crop the prediction and the ground truth label_predictions_new = [] for j, label_prediction in zip(xrange(len(label_predictions)), label_predictions): seg_rec = test_data.segdb[index * test_data.batch_size + j] imh, imw = (seg_rec['height'], seg_rec['width']) label_prediction = label_prediction[:imh, :imw] label_predictions_new.append(label_prediction) label_predictions = label_predictions_new #5.update the online prediction if test_data.has_label: labels_gt = [label[0].asnumpy() for label in test_data.label] labels_gt = tensor_vstack(labels_gt) labels_gt = [label for label in labels_gt] for j, label_prediction, label_gt in zip( xrange(len(label_predictions)), label_predictions, labels_gt): seg_rec = test_data.segdb[index * test_data.batch_size + j] imh, imw = (seg_rec['height'], seg_rec['width']) label_gt = np.squeeze(label_gt[:, :imh, :imw]) if Debug: print label_prediction.shape, label_gt.shape assert label_prediction.shape == label_gt.shape scorer.update(pred_label=label_prediction, label=label_gt, i=index * test_data.batch_size + j) all_segmentation_result[idx:idx + test_data.batch_size - data_batch.pad] = [ output.astype('int8') for output in label_predictions ] logger.info('Done {}/{}'.format(idx + batch_size, test_data.size)) idx += test_data.batch_size - data_batch.pad #total results logger.info('-------------------------------------------------------') if test_data.has_label: evaluation_results = imdb.evaluate_segmentations( all_segmentation_result) with open(res_file, 'wb') as f: cPickle.dump(evaluation_results, f, protocol=cPickle.HIGHEST_PROTOCOL) meanIU = evaluation_results['meanIU'] IU_array = evaluation_results['IU_array'] logger.info('evaluate segmentation:') logger.info('class IU_array:') logger.info(str(IU_array * 100)) logger.info('meanIU:%.5f' % (meanIU * 100)) else: imdb.write_segmentation_result(all_segmentation_result) logger.info("write the result done!")