def get_batch(self):
        # slice roidb
        cur_from = self.cur
        cur_to = min(cur_from + self.batch_size, self.size)
        roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)]

        # decide multi device slices
        work_load_list = self.work_load_list
        ctx = self.ctx
        if work_load_list is None:
            work_load_list = [1] * len(ctx)
        assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \
            "Invalid settings for work load. "
        slices = _split_input_slice(self.batch_size, work_load_list)

        # get each device
        data_list = []
        label_list = []
        for islice in slices:
            iroidb = [roidb[i] for i in range(islice.start, islice.stop)]
            data, label = get_rcnn_batch(iroidb, self.cfg)
            data_list.append(data)
            label_list.append(label)

        all_data = dict()
        for key in data_list[0].keys():
            all_data[key] = tensor_vstack([batch[key] for batch in data_list])

        all_label = dict()
        for key in label_list[0].keys():
            all_label[key] = tensor_vstack([batch[key] for batch in label_list])

        self.data = [mx.nd.array(all_data[name]) for name in self.data_name]
        self.label = [mx.nd.array(all_label[name]) for name in self.label_name]
Beispiel #2
0
    def get_batch(self):
        # slice roidb
        cur_from = self.cur
        cur_to = min(cur_from + self.batch_size, self.size)
        roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)]

        # decide multi device slice
        work_load_list = self.work_load_list
        ctx = self.ctx
        if work_load_list is None:
            work_load_list = [1] * len(ctx)
        assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \
            "Invalid settings for work load. "
        slices = _split_input_slice(self.batch_size, work_load_list)

        # get testing data for multigpu
        data_list = []
        label_list = []
        for islice in slices:
            iroidb = [roidb[i] for i in range(islice.start, islice.stop)]
            data, label = get_rpn_batch(iroidb, self.cfg)
            data_list.append(data)
            label_list.append(label)

        # pad data first and then assign anchor (read label)
        data_tensor = tensor_vstack([batch['data'] for batch in data_list])
        for data, data_pad in zip(data_list, data_tensor):
            data['data'] = data_pad[np.newaxis, :]

        new_label_list = []
        for data, label in zip(data_list, label_list):
            # infer label shape
            data_shape = {k: v.shape for k, v in data.items()}
            del data_shape['im_info']
            _, feat_shape, _ = self.feat_sym.infer_shape(**data_shape)
            feat_shape = [int(i) for i in feat_shape[0]]

            # add gt_boxes to data for e2e
            data['gt_boxes'] = label['gt_boxes'][np.newaxis, :, :]

            # assign anchor for label
            label = assign_anchor(feat_shape, label['gt_boxes'], data['im_info'], self.cfg,
                                  self.feat_stride, self.anchor_scales,
                                  self.anchor_ratios, self.allowed_border,
                                  self.normalize_target, self.bbox_mean, self.bbox_std)
            new_label_list.append(label)

        all_data = dict()
        for key in self.data_name:
            all_data[key] = tensor_vstack([batch[key] for batch in data_list])

        all_label = dict()
        for key in self.label_name:
            pad = -1 if key == 'label' else 0
            all_label[key] = tensor_vstack([batch[key] for batch in new_label_list], pad=pad)

        self.data = [mx.nd.array(all_data[key]) for key in self.data_name]
        self.label = [mx.nd.array(all_label[key]) for key in self.label_name]
Beispiel #3
0
def get_rcnn_batch(roidb, cfg):
    """
    return a dict of multiple images
    :param roidb: a list of dict, whose length controls batch size
    ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets']
    :return: data, label
    """
    num_images = len(roidb)
    imgs, roidb = get_image(roidb, cfg)
    im_array = tensor_vstack(imgs)

    assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \
        'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS)

    if cfg.TRAIN.BATCH_ROIS == -1:
        rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb])
        fg_rois_per_image = rois_per_image
    else:
        rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES
        fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int)

    rois_array = list()
    labels_array = list()
    bbox_targets_array = list()
    bbox_weights_array = list()

    for im_i in range(num_images):
        roi_rec = roidb[im_i]

        # infer num_classes from gt_overlaps
        num_classes = roi_rec['gt_overlaps'].shape[1]

        # label = class RoI has max overlap with
        rois = roi_rec['boxes']
        labels = roi_rec['max_classes']
        overlaps = roi_rec['max_overlaps']
        bbox_targets = roi_rec['bbox_targets']

        im_rois, labels, bbox_targets, bbox_weights = \
            sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                        labels, overlaps, bbox_targets)

        # project im_rois
        # do not round roi
        rois = im_rois
        batch_index = im_i * np.ones((rois.shape[0], 1))
        rois_array_this_image = np.hstack((batch_index, rois))
        rois_array.append(rois_array_this_image)

        # add labels
        labels_array.append(labels)
        bbox_targets_array.append(bbox_targets)
        bbox_weights_array.append(bbox_weights)

    rois_array = np.array(rois_array)
    labels_array = np.array(labels_array)
    bbox_targets_array = np.array(bbox_targets_array)
    bbox_weights_array = np.array(bbox_weights_array)

    data = {'data': im_array,
            'rois': rois_array}
    label = {'label': labels_array,
             'bbox_target': bbox_targets_array,
             'bbox_weight': bbox_weights_array}

    return data, label
Beispiel #4
0
def get_rcnn_batch(roidb, cfg):
    """
    return a dict of multiple images
    :param roidb: a list of dict, whose length controls batch size
    ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets']
    :return: data, label
    """
    num_images = len(roidb)
    imgs, roidb = get_image(roidb, cfg)
    im_array = tensor_vstack(imgs)

    assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \
        'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS)

    if cfg.TRAIN.BATCH_ROIS == -1:
        rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb])
        fg_rois_per_image = rois_per_image
    else:
        rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES
        fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION *
                                     rois_per_image).astype(int)

    rois_array = list()
    labels_array = list()
    bbox_targets_array = list()
    bbox_weights_array = list()

    for im_i in range(num_images):
        roi_rec = roidb[im_i]

        # infer num_classes from gt_overlaps
        num_classes = roi_rec['gt_overlaps'].shape[1]

        # label = class RoI has max overlap with
        rois = roi_rec['boxes']
        labels = roi_rec['max_classes']
        overlaps = roi_rec['max_overlaps']
        bbox_targets = roi_rec['bbox_targets']

        im_rois, labels, bbox_targets, bbox_weights = \
            sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                        labels, overlaps, bbox_targets)

        # project im_rois
        # do not round roi
        rois = im_rois
        batch_index = im_i * np.ones((rois.shape[0], 1))
        rois_array_this_image = np.hstack((batch_index, rois))
        rois_array.append(rois_array_this_image)

        # add labels
        labels_array.append(labels)
        bbox_targets_array.append(bbox_targets)
        bbox_weights_array.append(bbox_weights)

    rois_array = np.array(rois_array)
    labels_array = np.array(labels_array)
    bbox_targets_array = np.array(bbox_targets_array)
    bbox_weights_array = np.array(bbox_weights_array)

    data = {'data': im_array, 'rois': rois_array}
    label = {
        'label': labels_array,
        'bbox_target': bbox_targets_array,
        'bbox_weight': bbox_weights_array
    }

    return data, label
Beispiel #5
0
    def get_batch(self):
        # slice roidb
        cur_from = self.cur
        cur_to = min(cur_from + self.batch_size, self.size)
        roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)]

        # decide multi device slice
        work_load_list = self.work_load_list
        ctx = self.ctx
        if work_load_list is None:
            work_load_list = [1] * len(ctx)
        assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \
            "Invalid settings for work load. "
        slices = _split_input_slice(self.batch_size, work_load_list)

        # get testing data for multigpu
        data_list = []
        label_list = []
        for islice in slices:
            iroidb = [roidb[i] for i in range(islice.start, islice.stop)]
            data, label = get_rpn_batch(iroidb, self.cfg)
            data_list.append(data)
            label_list.append(label)

        # pad data first and then assign anchor (read label)
        data_tensor = tensor_vstack([batch['data'] for batch in data_list])
        for data, data_pad in zip(data_list, data_tensor):
            data['data'] = data_pad[np.newaxis, :]

        new_label_list = []
        for data, label in zip(data_list, label_list):
            # infer label shape
            data_shape = {k: v.shape for k, v in data.items()}
            del data_shape['im_info']

            _, feat_shape_p3, _ = self.feat_sym_p3.infer_shape(**data_shape)
            feat_shape_p3 = [int(i) for i in feat_shape_p3[0]]
            _, feat_shape_p4, _ = self.feat_sym_p4.infer_shape(**data_shape)
            feat_shape_p4 = [int(i) for i in feat_shape_p4[0]]
            _, feat_shape_p5, _ = self.feat_sym_p5.infer_shape(**data_shape)
            feat_shape_p5 = [int(i) for i in feat_shape_p5[0]]

            # add gt_boxes to data for e2e
            data['gt_boxes'] = label['gt_boxes'][np.newaxis, :, :]

            # assign anchor for label
            label = assign_anchor(feat_shape_p3, feat_shape_p4, feat_shape_p5,
                                  label['gt_boxes'], data['im_info'], self.cfg,
                                  self.feat_stride_p3, self.anchor_scales_p3,
                                  self.anchor_ratios_p3, self.feat_stride_p4,
                                  self.anchor_scales_p4, self.anchor_ratios_p4,
                                  self.feat_stride_p5, self.anchor_scales_p5,
                                  self.anchor_ratios_p5, self.allowed_border)
            new_label_list.append(label)

        all_data = dict()
        for key in self.data_name:
            all_data[key] = tensor_vstack([batch[key] for batch in data_list])

        all_label = dict()
        for key in self.label_name:
            pad = -1 if key == 'label' else 0
            all_label[key] = tensor_vstack(
                [batch[key] for batch in new_label_list], pad=pad)

        self.data = [mx.nd.array(all_data[key]) for key in self.data_name]
        self.label = [mx.nd.array(all_label[key]) for key in self.label_name]
Beispiel #6
0
def parfetch(config, crop_width, crop_height, isegdb):
    # get testing data for multigpu

    if config.dataset.dataset == "PascalVOC" or config.dataset.dataset == "ADE20K":
        datas = {}
        labels = {}
        datas['data'], labels['label'] = get_segmentation_image_voc(
            isegdb, config)
        if config.network.use_metric:
            labels['metric_label'] = generate_metric_label(labels['label'])
        if config.TRAIN.use_mult_metric:
            for i in [1, 2, 4]:
                labels['metric_label_' + str(i)] = generate_metric_label(
                    labels['label'], skip_step=i)

        return {'data': datas, 'label': labels}
    else:
        datas, labels = get_segmentation_train_batch(isegdb, config)
        feature_stride = config.network.LABEL_STRIDE
        network_ratio = config.network.ratio
        if config.TRAIN.enable_crop:
            datas_internal = datas['data']
            labels_internal = labels['label']
            sx = math.floor(random.random() *
                            (datas_internal.shape[3] - crop_width + 1))
            sy = math.floor(random.random() *
                            (datas_internal.shape[2] - crop_height + 1))
            sx = (int)(sx)
            sy = (int)(sy)

            assert (sx >= 0 and sx < datas_internal.shape[3] - crop_width + 1)
            assert (sy >= 0 and sy < datas_internal.shape[2] - crop_height + 1)

            ex = (int)(sx + crop_width - 1)
            ey = (int)(sy + crop_height - 1)

            datas_internal = datas_internal[:, :, sy:ey + 1, sx:ex + 1]
            labels_internal = labels_internal[:, :, sy:ey + 1, sx:ex + 1]

            if config.network.use_crop_context:
                crop_context_scale = config.network.crop_context_scale

                scale_width = make_divisible(
                    int(float(crop_width) / crop_context_scale),
                    feature_stride)
                scale_height = make_divisible(
                    int(float(crop_height) / crop_context_scale),
                    feature_stride)
                pad_width = int(scale_width - crop_width) / 2
                pad_height = int(scale_height - crop_height) / 2

                datas['origin_data'] = np.zeros(
                    (datas['data'].shape[0], datas['data'].shape[1],
                     datas['data'].shape[2] + 2 * int(pad_height),
                     datas['data'].shape[3] + 2 * int(pad_width)))
                datas['origin_data'][:, :,
                                     int(pad_height):datas['data'].shape[2] +
                                     int(pad_height),
                                     int(pad_width):datas['data'].shape[3] +
                                     int(pad_width)] = datas['data']

                labels['origin_label'] = np.full(
                    (labels['label'].shape[0], labels['label'].shape[1],
                     labels['label'].shape[2] + 2 * int(pad_height),
                     labels['label'].shape[3] + 2 * int(pad_width)), 255)
                labels[
                    'origin_label'][:, :,
                                    int(pad_height):labels['label'].shape[2] +
                                    int(pad_height),
                                    int(pad_width):labels['label'].shape[3] +
                                    int(pad_width)] = labels['label']

                datas_origin = datas['origin_data'][:, :, sy:sy + scale_height,
                                                    sx:sx + scale_width]

                labels_origin = labels['origin_label'][:, :,
                                                       sy:sy + scale_height,
                                                       sx:sx + scale_width]

                datas['origin_data'] = datas_origin
                labels['origin_label'] = labels_origin

                # labels_origin_in = np.zeros((labels['origin_label'].shape[0],labels['origin_label'].shape[1],
                #                   labels['origin_label'].shape[2]//feature_stride,labels['origin_label'].shape[3]//feature_stride))
                # for i, label in enumerate(labels['origin_label']):
                #     label_im = Image.fromarray(np.squeeze(label.astype(np.uint8, copy=False))).resize(
                #         (labels['origin_label'].shape[3] // feature_stride,
                #          labels['origin_label'].shape[2] // feature_stride), Image.NEAREST)
                #     label = np.array(label_im)
                #     labels_origin_in[i, 0, :, :] = label
                #
                # labels['origin_label']=labels_origin_in

                rois = []
                for i, im_info in zip(xrange(datas_internal.shape[0]),
                                      datas['im_info']):
                    rois.append(
                        np.array([
                            i, pad_width, pad_height, pad_width + crop_width,
                            pad_height + crop_height
                        ]).reshape((1, 5)))
                datas['rois'] = tensor_vstack(rois)
                # print rois

                datas['data'] = datas_internal
                labels['label'] = labels_internal

            else:
                rois = []
                for i, im_info in zip(xrange(datas_internal.shape[0]),
                                      datas['im_info']):
                    scale = im_info[2]
                    rois.append(
                        np.array([
                            i, sx * network_ratio / scale,
                            sy * network_ratio / scale,
                            (ex + 1) * network_ratio / scale,
                            (ey + 1) * network_ratio / scale
                        ]).reshape((1, 5)))
                datas['rois'] = tensor_vstack(rois)

                datas['data'] = datas_internal
                labels['label'] = labels_internal
                assert (datas['data'].shape[2]
                        == crop_height) and (datas['data'].shape[3]
                                             == crop_width)
        else:
            datas_internal = datas['data']
            rois = []
            for i, im_info in zip(xrange(datas_internal.shape[0]),
                                  datas['im_info']):
                im_size = im_info[:2]
                rois.append(
                    np.array([
                        i, 0, 0, im_size[1] * network_ratio,
                        im_size[0] * network_ratio
                    ]).reshape((1, 5)))
            datas['rois'] = tensor_vstack(rois)

        # if feature_stride == 1:
        #     assert (labels['label'].shape[2] == crop_height) and (labels['label'].shape[3] == crop_width)
        # else:

        labels_in = dict()
        labels_in['origin_label'] = labels['origin_label']
        labels_in['label'] = np.zeros(
            (labels['label'].shape[0], labels['label'].shape[1],
             labels['label'].shape[2] // feature_stride,
             labels['label'].shape[3] // feature_stride))

        # to reshape the label to the network label
        for i, label in enumerate(labels['label']):
            label_im = Image.fromarray(
                np.squeeze(label.astype(np.uint8, copy=False))).resize(
                    (labels['label'].shape[3] // feature_stride,
                     labels['label'].shape[2] // feature_stride),
                    Image.NEAREST)
            label = np.array(label_im)
            labels_in['label'][i, 0, :, :] = label

        labels = labels_in

        if config.TRAIN.enable_ignore_border:
            labels['label'] = border_ignore_label(
                labels['label'], config.TRAIN.ignore_border_size, 255.0)

        if config.network.use_metric:
            labels['metric_label'] = generate_metric_label(labels['label'])

        if config.TRAIN.use_mult_metric:
            scale_name = ['a', 'b', 'c']
            if config.network.scale_list == [1, 2, 4]:
                scale_name = ['', '', '']
            for idx, i in enumerate(config.network.scale_list):
                labels['metric_label_' + str(i) +
                       scale_name[idx]] = generate_metric_label(
                           labels['label'], skip_step=i)

        return {'data': datas, 'label': labels}
def get_rcnn_batch(roidb, cfg):
    """
    return a dict of multiple images
    :param roidb: a list of dict, whose length controls batch size
    ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets']
    :return: data, label
    """
    num_images = len(roidb)
    imgs, roidb = get_image(roidb, cfg)
    im_array = tensor_vstack(imgs)

    assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \
        'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS)

    if cfg.TRAIN.BATCH_ROIS == -1:
        rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb])
        fg_rois_per_image = rois_per_image
    else:
        rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES
        fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int)

    if cfg.network.ROIDispatch:
        rois_array_0 = list()
        rois_array_1 = list()
        rois_array_2 = list()
        rois_array_3 = list()
    else:
        rois_array = list()

    gt_labels_array = list()
    labels_array = list()
    bbox_targets_array = list()
    bbox_weights_array = list()

    for im_i in range(num_images):
        roi_rec = roidb[im_i]

        # infer num_classes from gt_overlaps
        num_classes = roi_rec['gt_overlaps'].shape[1]

        # label = class RoI has max overlap with
        rois = roi_rec['boxes']
        labels = roi_rec['max_classes']
        overlaps = roi_rec['max_overlaps']
        bbox_targets = roi_rec['bbox_targets']
        gt_lables = roi_rec['is_gt']

        if cfg.TRAIN.BATCH_ROIS == -1:
            im_rois, labels_t, bbox_targets, bbox_weights = \
                sample_rois_v2(rois, num_classes, cfg, labels=labels, overlaps=overlaps, bbox_targets=bbox_targets, gt_boxes=None)

            assert np.abs(im_rois - rois).max() < 1e-3
            assert np.abs(labels_t - labels).max() < 1e-3
        else:
            im_rois, labels, bbox_targets, bbox_weights, gt_lables =  \
                sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                            labels, overlaps, bbox_targets, gt_lables=gt_lables)

        # project im_rois
        # do not round roi
        if cfg.network.ROIDispatch:
            w = im_rois[:, 2] - im_rois[:, 0] + 1
            h = im_rois[:, 3] - im_rois[:, 1] + 1
            feat_id = np.clip(np.floor(2 + np.log2(np.sqrt(w * h) / 224)), 0, 3).astype(int)

            rois_0_idx = np.where(feat_id == 0)[0]
            rois_0 = im_rois[rois_0_idx]
            if len(rois_0) == 0:
                rois_0 = np.zeros((1, 4))
                label_0 = -np.ones((1,))
                gt_label_0 = -np.ones((1,))
                bbox_targets_0 = np.zeros((1, bbox_targets.shape[1]))
                bbox_weights_0 = np.zeros((1, bbox_weights.shape[1]))
            else:
                label_0 = labels[rois_0_idx]
                gt_label_0 = gt_lables[rois_0_idx]
                bbox_targets_0 = bbox_targets[rois_0_idx]
                bbox_weights_0 = bbox_weights[rois_0_idx]

            rois_1_idx = np.where(feat_id == 1)[0]
            rois_1 = im_rois[rois_1_idx]
            if len(rois_1) == 0:
                rois_1 = np.zeros((1, 4))
                label_1 = -np.ones((1,))
                gt_label_1 = -np.ones((1,))
                bbox_targets_1 = np.zeros((1, bbox_targets.shape[1]))
                bbox_weights_1 = np.zeros((1, bbox_weights.shape[1]))
            else:
                label_1 = labels[rois_1_idx]
                gt_label_1 = gt_lables[rois_1_idx]
                bbox_targets_1 = bbox_targets[rois_1_idx]
                bbox_weights_1 = bbox_weights[rois_1_idx]

            rois_2_idx = np.where(feat_id == 2)
            rois_2 = im_rois[rois_2_idx]
            if len(rois_2) == 0:
                rois_2 = np.zeros((1, 4))
                label_2 = -np.ones((1,))
                gt_label_2 = -np.ones((1,))
                bbox_targets_2 = np.zeros((1, bbox_targets.shape[1]))
                bbox_weights_2 = np.zeros((1, bbox_weights.shape[1]))
            else:
                label_2 = labels[rois_2_idx]
                gt_label_2 = gt_lables[rois_2_idx]
                bbox_targets_2 = bbox_targets[rois_2_idx]
                bbox_weights_2 = bbox_weights[rois_2_idx]

            rois_3_idx = np.where(feat_id == 3)
            rois_3 = im_rois[rois_3_idx]
            if len(rois_3) == 0:
                rois_3 = np.zeros((1, 4))
                label_3 = -np.ones((1,))
                gt_label_3 = -np.ones((1,))
                bbox_targets_3 = np.zeros((1, bbox_targets.shape[1]))
                bbox_weights_3 = np.zeros((1, bbox_weights.shape[1]))
            else:
                label_3 = labels[rois_3_idx]
                gt_label_3 = gt_lables[rois_3_idx]
                bbox_targets_3 = bbox_targets[rois_3_idx]
                bbox_weights_3 = bbox_weights[rois_3_idx]

            # stack batch index
            rois_array_0.append(np.hstack((im_i * np.ones((rois_0.shape[0], 1)), rois_0)))
            rois_array_1.append(np.hstack((im_i * np.ones((rois_1.shape[0], 1)), rois_1)))
            rois_array_2.append(np.hstack((im_i * np.ones((rois_2.shape[0], 1)), rois_2)))
            rois_array_3.append(np.hstack((im_i * np.ones((rois_3.shape[0], 1)), rois_3)))

            labels = np.concatenate([label_0, label_1, label_2, label_3], axis=0)
            gt_lables = np.concatenate([gt_label_0, gt_label_1, gt_label_2, gt_label_3], axis=0)
            bbox_targets = np.concatenate([bbox_targets_0, bbox_targets_1, bbox_targets_2, bbox_targets_3], axis=0)
            bbox_weights = np.concatenate([bbox_weights_0, bbox_weights_1, bbox_weights_2, bbox_weights_3], axis=0)
        else:
            rois = im_rois
            batch_index = im_i * np.ones((rois.shape[0], 1))
            rois_array_this_image = np.hstack((batch_index, rois))
            rois_array.append(rois_array_this_image)

        # add labels
        gt_labels_array.append(gt_lables)
        labels_array.append(labels)
        bbox_targets_array.append(bbox_targets)
        bbox_weights_array.append(bbox_weights)

    gt_labels_array = np.array(gt_labels_array)
    nongt_index_array = np.where(gt_labels_array == 0)[1]
    labels_array = np.array(labels_array)
    bbox_targets_array = np.array(bbox_targets_array)
    bbox_weights_array = np.array(bbox_weights_array)

    if cfg.network.USE_NONGT_INDEX:

        label = {'label': labels_array,
                 'nongt_index': nongt_index_array,
                 'bbox_target': bbox_targets_array,
                 'bbox_weight': bbox_weights_array}

    else:
        label = {'label': labels_array,
                 'bbox_target': bbox_targets_array,
                 'bbox_weight': bbox_weights_array}

    if cfg.network.ROIDispatch:
        rois_array_0 = np.array(rois_array_0)
        rois_array_1 = np.array(rois_array_1)
        rois_array_2 = np.array(rois_array_2)
        rois_array_3 = np.array(rois_array_3)
        # rois_concate = np.concatenate((rois_array_0, rois_array_1, rois_array_2, rois_array_3), axis=1)
        # gt_rois_t = rois_concate[:, gt_labels_array[0,:] > 0]
        data = {'data': im_array,
                'rois_0': rois_array_0,
                'rois_1': rois_array_1,
                'rois_2': rois_array_2,
                'rois_3': rois_array_3}
    else:
        rois_array = np.array(rois_array)
        data = {'data': im_array,
                'rois': rois_array}

    if cfg.TRAIN.LEARN_NMS:
        # im info
        im_info = np.array([roidb[0]['im_info']], dtype=np.float32)
        # gt_boxes
        if roidb[0]['gt_classes'].size > 0:
            gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
            gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
            gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :]
            gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
        else:
            gt_boxes = np.empty((0, 5), dtype=np.float32)
        data['im_info'] = im_info
        data['gt_boxes'] = gt_boxes

    return data, label
def get_segmentation_test_batch(segdb,
                                config,
                                is_train=False,
                                has_label=True,
                                scale=1.0):
    """
    return a dict of train batch
    :param segdb: ['image', 'flipped']
    :param config: the config setting
    :return: data, label, im_info
    """
    imgs, seg_cls_gts, segdb, origin_ims, origin_labels = get_segmentation_image(
        segdb, config, is_train=is_train, has_label=has_label, scale=scale)

    im_array = tensor_vstack(imgs)
    if has_label:
        seg_cls_gt = tensor_vstack(seg_cls_gts)
    else:
        seg_cls_gt = []
    im_info = tensor_vstack(
        [np.array([isegdb['im_info']], dtype=np.float32) for isegdb in segdb])
    origin_im = tensor_vstack(origin_ims)
    rois = []

    if config.network.use_crop_context:
        crop_context_scale = config.network.crop_context_scale
        crop_height, crop_width = config.TRAIN.crop_size
        feature_stride = config.network.LABEL_STRIDE
        scale_width = make_divisible(
            int(float(crop_width) / crop_context_scale), feature_stride)
        scale_height = make_divisible(
            int(float(crop_height) / crop_context_scale), feature_stride)
        pad_width = int(scale_width - crop_width) / 2
        pad_height = int(scale_height - crop_height) / 2

        origin_data = np.zeros((im_array.shape[0], im_array.shape[1],
                                im_array.shape[2] + 2 * int(pad_height),
                                im_array.shape[3] + 2 * int(pad_width)))
        origin_data[:, :,
                    int(pad_height):im_array.shape[2] + int(pad_height),
                    int(pad_width):im_array.shape[3] +
                    int(pad_width)] = im_array

        for i, im_info in enumerate(im_info):
            im_size = im_info[:2]
            rois.append(
                np.array([
                    i, pad_width, pad_height, pad_width + im_size[1],
                    pad_width + im_size[0]
                ]).reshape((1, 5)))
        rois = tensor_vstack(rois)
        # print rois

    else:
        network_ratio = config.network.ratio
        for i, im_info in enumerate(im_info):
            im_size = im_info[:2]
            rois.append(
                np.array([
                    i, 0, 0, im_size[1] * network_ratio,
                    im_size[0] * network_ratio
                ]).reshape((1, 5)))
        rois = tensor_vstack(rois)
        print rois

    data = {
        'data': im_array,
        'im_info': im_info,
        'origin_data': origin_im,
        'rois': rois
    }

    label = {'label': seg_cls_gt}

    return {'data': data, 'label': label}
def pred_eval(predictor, test_data, imdb, vis=False, ignore_cache=None, logger=None):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param ignore_cache: ignore the saved cache file
    :param logger: the logger instance
    :return:
    """
    res_file = os.path.join(imdb.result_path, imdb.name + '_segmentations.pkl')

    if output_median:
        output_median_dir = os.path.join(imdb.result_path, 'numpy_output')
        if not os.path.exists(output_median_dir):
            os.makedirs(output_median_dir)

    if os.path.exists(res_file) and not ignore_cache:
        with open(res_file , 'rb') as fid:
            evaluation_results = cPickle.load(fid)
            meanIU = evaluation_results['meanIU']
            IU_array = evaluation_results['IU_array']
            logger.info('evaluate segmentation: \n')
            logger.info('class IU_array:\n')
            logger.info(str(IU_array*100))
            logger.info( 'meanIU:%.5f'%(meanIU*100))
        return

    assert vis or not test_data.shuffle

    if test_data.has_label:
        scorer = ScoreUpdater(np.arange(config.dataset.NUM_CLASSES), config.dataset.NUM_CLASSES,
                              test_data.size, logger)
        scorer.reset()

    all_segmentation_result = [[] for _ in xrange(imdb.num_images)]

    num_steps = config.TEST.num_steps
    use_flipping = config.TEST.use_flipping
    num_class = config.dataset.NUM_CLASSES
    label_stride = config.network.LABEL_STRIDE

    idx = 0
    save_feature = 0
    if config.network.use_metric:
        output_name = "FUSION_softmax_output"
    else:
        output_name = "softmax_output"
    # output_name = "FUSION_softmax_output"
    name_i = 0
    for index,data_batch in enumerate(test_data):

        origin_data_shapes = [
            (data_shape[0][1][0], data_shape[0][1][1], data_shape[0][1][2] , data_shape[0][1][3]) for
            data_shape in data_batch.provide_data]

        softmax_outputs_scales = []
        logger.info("#####################################")

        batch_size = 0
        for data_shape in origin_data_shapes:
            batch_size += data_shape[0]

        softmax_batch_predictions = predictor.predict()


        #3.get the final label prediction and save the softmax to the h5 format
        label_predictions=[]
        for batch_softmax_output in softmax_batch_predictions:
            label_predictions.extend([np.argmax(softmax_output, axis=0) for softmax_output in batch_softmax_output])

        if config.TEST.save_h5py:
            save_batch_softmax_ouputs(imdb.result_path,test_data.segdb[idx:idx++test_data.batch_size - data_batch.pad],softmax_batch_predictions)

        #4.crop the prediction and the ground truth
        label_predictions_new = []
        for j, label_prediction in zip(xrange(len(label_predictions)), label_predictions):
            seg_rec = test_data.segdb[index * test_data.batch_size + j]
            imh, imw = (seg_rec['height'], seg_rec['width'])
            label_prediction = label_prediction[:imh, :imw]
            label_predictions_new.append(label_prediction)
        label_predictions = label_predictions_new

        #5.update the online prediction
        if test_data.has_label:
            labels_gt = [label[0].asnumpy() for label in test_data.label]
            labels_gt = tensor_vstack(labels_gt)
            labels_gt = [label for label in  labels_gt]
            for j,label_prediction,label_gt in zip(xrange(len(label_predictions)),label_predictions,labels_gt):
                seg_rec = test_data.segdb[index * test_data.batch_size + j]
                imh, imw = (seg_rec['height'], seg_rec['width'])
                label_gt = np.squeeze(label_gt[:,:imh, :imw])
                if Debug:
                    print label_prediction.shape, label_gt.shape
                assert label_prediction.shape == label_gt.shape
                scorer.update(pred_label=label_prediction,label=label_gt,i=index*test_data.batch_size+j)

        all_segmentation_result[idx: idx+test_data.batch_size - data_batch.pad] = [output.astype('int8') for output in label_predictions]
        logger.info('Done {}/{}'.format(idx + batch_size, test_data.size))
        idx += test_data.batch_size - data_batch.pad

    #total results
    logger.info('-------------------------------------------------------')
    if test_data.has_label:
        evaluation_results = imdb.evaluate_segmentations(all_segmentation_result)
        with open(res_file, 'wb') as f:
            cPickle.dump(evaluation_results, f, protocol=cPickle.HIGHEST_PROTOCOL)
            meanIU = evaluation_results['meanIU']
            IU_array = evaluation_results['IU_array']
            logger.info('evaluate segmentation:')
            logger.info('class IU_array:')
            logger.info(str(IU_array*100))
            logger.info('meanIU:%.5f' % (meanIU*100))

    else:
        imdb.write_segmentation_result(all_segmentation_result)
        logger.info("write the result done!")
Beispiel #10
0
def pred_eval(predictor,
              test_data,
              imdb,
              vis=False,
              ignore_cache=None,
              logger=None):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param ignore_cache: ignore the saved cache file
    :param logger: the logger instance
    :return:
    """
    res_file = os.path.join(imdb.result_path, imdb.name + '_segmentations.pkl')
    output_median_dir = os.path.join(imdb.result_path, 'numpy_output')
    if not os.path.exists(output_median_dir):
        os.makedirs(output_median_dir)

    if os.path.exists(res_file) and not ignore_cache:
        with open(res_file, 'rb') as fid:
            evaluation_results = cPickle.load(fid)
            meanIU = evaluation_results['meanIU']
            IU_array = evaluation_results['IU_array']
            logger.info('evaluate segmentation: \n')
            logger.info('class IU_array:\n')
            logger.info(str(IU_array * 100))
            logger.info('meanIU:%.5f' % (meanIU * 100))
        return

    assert vis or not test_data.shuffle

    if test_data.has_label:
        scorer = ScoreUpdater(np.arange(config.dataset.NUM_CLASSES),
                              config.dataset.NUM_CLASSES, test_data.size,
                              logger)
        scorer.reset()

    all_segmentation_result = [[] for _ in xrange(imdb.num_images)]

    num_steps = config.TEST.num_steps
    use_flipping = config.TEST.use_flipping
    num_class = config.dataset.NUM_CLASSES
    label_stride = config.network.LABEL_STRIDE

    if config.dataset.dataset == 'cityscapes':
        feature_stride = label_stride
    else:
        feature_stride = label_stride * 4

    idx = 0
    save_feature = 0
    if config.network.use_metric and not config.TRAIN.use_crl_ses:
        output_name = "FUSION_softmax_output"
    else:
        output_name = "softmax_output"
    #output_name = "FUSION_softmax_output"
    name_i = 0
    for index, data_batch in enumerate(test_data):

        origin_data_shapes = [(data_shape[0][1][0], data_shape[0][1][1],
                               data_shape[0][1][2], data_shape[0][1][3])
                              for data_shape in data_batch.provide_data]

        softmax_outputs_scales = []
        logger.info("#####################################")

        for scale in config.TEST.ms_array:
            logger.info("Now Scale: %.2f" % scale)

            test_data.get_batch(scale, True)

            scale_data_shapes = [(data_shape[0][1][0], data_shape[0][1][1],
                                  data_shape[0][1][2], data_shape[0][1][3])
                                 for data_shape in test_data.provide_data]

            data_shapes = [(scale_data_shape[0], scale_data_shape[1],
                            make_divisible(scale_data_shape[2],
                                           feature_stride),
                            make_divisible(scale_data_shape[3],
                                           feature_stride))
                           for scale_data_shape in scale_data_shapes]

            batch_size = 0
            for data_shape in data_shapes:
                batch_size += data_shape[0]

            data_batch.provide_data = [[('data', data_shape)]
                                       for data_shape in data_shapes]

            canva_softmax_outputs = [
                np.zeros((data_shape[0], num_class,
                          data_shape[2] // label_stride * num_steps,
                          data_shape[3] // label_stride * num_steps))
                for data_shape in data_shapes
            ]

            canva_datas = [
                np.zeros((data_shape[0], data_shape[1],
                          data_shape[2] + label_stride,
                          data_shape[3] + label_stride))
                for data_shape in data_shapes
            ]

            sy = sx = label_stride // 2
            for canva_data, origin_data, data_shape in zip(
                    canva_datas, test_data.data, data_shapes):
                canva_data[:, :, sy:sy + data_shape[2],
                           sx:sx + data_shape[3]] = resize_batch_target(
                               origin_data[0].asnumpy(), data_shape[3],
                               data_shape[2])
            # prepare the start of the strides
            prediction_stride = label_stride // num_steps
            sy = sx = prediction_stride // 2 + np.arange(
                num_steps) * prediction_stride

            # operation of mult_step
            for ix in xrange(num_steps):
                for iy in xrange(num_steps):
                    data_batch.data = [[
                        mx.nd.array(canva_data[:, :,
                                               sy[iy]:sy[iy] + data_shape[2],
                                               sx[ix]:sx[ix] + data_shape[3]])
                    ] for canva_data, data_shape in zip(
                        canva_datas, data_shapes)]

                    output_all = predictor.predict(data_batch)
                    softmax_outputs = [
                        output[output_name].asnumpy() for output in output_all
                    ]
                    for canva_softmax_output, softmax_output in zip(
                            canva_softmax_outputs, softmax_outputs):
                        canva_softmax_output[:, :, iy::num_steps,
                                             ix::num_steps] = softmax_output

                    if use_flipping:
                        data_batch.data = [[
                            mx.nd.array(
                                canva_data[:, :, sy[iy]:sy[iy] + data_shape[2],
                                           sx[ix]:sx[ix] +
                                           data_shape[3]][:, :, :, ::-1])
                        ] for canva_data, data_shape in zip(
                            canva_datas, data_shapes)]
                        output_all = predictor.predict(data_batch)
                        softmax_outputs = [
                            output[output_name].asnumpy()
                            for output in output_all
                        ]
                        for canva_softmax_output, softmax_output in zip(
                                canva_softmax_outputs, softmax_outputs):
                            canva_softmax_output[:, :, iy::num_steps,
                                                 ix::num_steps] = 0.5 * (
                                                     canva_softmax_output[:, :,
                                                                          iy::
                                                                          num_steps,
                                                                          ix::
                                                                          num_steps]
                                                     +
                                                     softmax_output[:, :, :, ::
                                                                    -1])

            # resize the data inputs and crop the scale inputs
            final_canva_softmax_outputs = [
                np.zeros((scale_data_shape[0], num_class, scale_data_shape[2],
                          scale_data_shape[3]))
                for scale_data_shape in scale_data_shapes
            ]
            for data_shape, scale_data_shape, canva_softmax_output, final_canva_softmax_output in zip(
                    data_shapes, scale_data_shapes, canva_softmax_outputs,
                    final_canva_softmax_outputs):
                final_canva_softmax_output[:, :, :, :] = resize_batch_softmax_output(
                    canva_softmax_output, scale_data_shape[2:])
            softmax_outputs_scales.append(final_canva_softmax_outputs)

        if output_median:
            for zi, output_all_batch in enumerate(output_all):
                output_all_batch_numpy = output_all_batch[
                    output_median_name].asnumpy()
                for zj, output_numpy_one in enumerate(output_all_batch_numpy):
                    f = file(
                        os.path.join(
                            output_median_dir,
                            output_median_name + '_' + str(name_i) + ".npy"),
                        "wb")
                    np.save(f, output_numpy_one)
            name_i += 1
            print test_data.segdb[0]
            print "name", name_i

        #1.resize the data shape
        softmax_outputs_scales_new = []
        for canva_softmax_outputs in softmax_outputs_scales:
            batch_softmax_output_list = []
            for data_shape, batch_softmax_output in zip(
                    origin_data_shapes, canva_softmax_outputs):
                if Debug:
                    print "#1:batch_softmax_output ", batch_softmax_output.shape
                    print "#2:target shape ", data_shape[2:]
                target_size = data_shape[2:]
                batch_softmax_output = resize_batch_softmax_output(
                    batch_softmax_output, target_size)
                batch_softmax_output_list.append(batch_softmax_output)
            softmax_outputs_scales_new.append(batch_softmax_output_list)

        #2.get the avg softmax prediction
        softmax_batch_predictions = [
            np.zeros((data_shape[0], num_class, data_shape[2], data_shape[3]))
            for data_shape in origin_data_shapes
        ]
        for i in xrange(len(softmax_outputs_scales_new)):
            for j in xrange(len(data_batch.provide_data)):
                softmax_batch_predictions[j] += softmax_outputs_scales_new[i][
                    j]

        for j in xrange(len(data_batch.provide_data)):
            softmax_batch_predictions[j] /= float(
                len(softmax_outputs_scales_new))

        #3.get the final label prediction and save the softmax to the h5 format
        label_predictions = []
        for batch_softmax_output in softmax_batch_predictions:
            label_predictions.extend([
                np.argmax(softmax_output, axis=0)
                for softmax_output in batch_softmax_output
            ])

        if config.TEST.save_h5py:
            save_batch_softmax_ouputs(
                imdb.result_path,
                test_data.segdb[idx:idx + +test_data.batch_size -
                                data_batch.pad], softmax_batch_predictions)

        #4.crop the prediction and the ground truth
        label_predictions_new = []
        for j, label_prediction in zip(xrange(len(label_predictions)),
                                       label_predictions):
            seg_rec = test_data.segdb[index * test_data.batch_size + j]
            imh, imw = (seg_rec['height'], seg_rec['width'])
            label_prediction = label_prediction[:imh, :imw]
            label_predictions_new.append(label_prediction)
        label_predictions = label_predictions_new

        #5.update the online prediction
        if test_data.has_label:
            labels_gt = [label[0].asnumpy() for label in test_data.label]
            labels_gt = tensor_vstack(labels_gt)
            labels_gt = [label for label in labels_gt]
            for j, label_prediction, label_gt in zip(
                    xrange(len(label_predictions)), label_predictions,
                    labels_gt):
                seg_rec = test_data.segdb[index * test_data.batch_size + j]
                imh, imw = (seg_rec['height'], seg_rec['width'])
                label_gt = np.squeeze(label_gt[:, :imh, :imw])
                if Debug:
                    print label_prediction.shape, label_gt.shape
                assert label_prediction.shape == label_gt.shape
                scorer.update(pred_label=label_prediction,
                              label=label_gt,
                              i=index * test_data.batch_size + j)

        all_segmentation_result[idx:idx + test_data.batch_size -
                                data_batch.pad] = [
                                    output.astype('int8')
                                    for output in label_predictions
                                ]
        logger.info('Done {}/{}'.format(idx + batch_size, test_data.size))
        idx += test_data.batch_size - data_batch.pad

    #total results
    logger.info('-------------------------------------------------------')
    if test_data.has_label:
        evaluation_results = imdb.evaluate_segmentations(
            all_segmentation_result)
        with open(res_file, 'wb') as f:
            cPickle.dump(evaluation_results,
                         f,
                         protocol=cPickle.HIGHEST_PROTOCOL)
            meanIU = evaluation_results['meanIU']
            IU_array = evaluation_results['IU_array']
            logger.info('evaluate segmentation:')
            logger.info('class IU_array:')
            logger.info(str(IU_array * 100))
            logger.info('meanIU:%.5f' % (meanIU * 100))

    else:
        imdb.write_segmentation_result(all_segmentation_result)
        logger.info("write the result done!")