コード例 #1
0
ファイル: func.py プロジェクト: chr5tphr/ecGAN
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1, ctx=None):
    # First figure out what the size of the output should be
    N, C, H, W = x_shape
    assert (H + 2 * padding - field_height) % stride == 0
    assert (W + 2 * padding - field_height) % stride == 0
    out_height = int((H + 2 * padding - field_height) / stride + 1)
    out_width = int((W + 2 * padding - field_width) / stride + 1)

    i0 = nd.repeat(nd.arange(field_height, ctx=ctx), field_width)
    i0 = nd.tile(i0, C)
    i1 = stride * nd.repeat(nd.arange(out_height, ctx=ctx), out_width)
    j0 = nd.tile(nd.arange(field_width, ctx=ctx), field_height * C)
    j1 = stride * nd.tile(nd.arange(out_width, ctx=ctx), out_height)
    i = i0.reshape((-1, 1)) + i1.reshape((1, -1))
    j = j0.reshape((-1, 1)) + j1.reshape((1, -1))

    k = nd.repeat(nd.arange(C, ctx=ctx), field_height * field_width).reshape((-1, 1))

    return (k.astype('int32'), i.astype('int32'), j.astype('int32'))
コード例 #2
0
ファイル: image.py プロジェクト: xiayongtao/gluon-cv
def resize_contain(src, size, fill=0):
    """Resize the image to fit in the given area while keeping aspect ratio.

    If both the height and the width in `size` are larger than
    the height and the width of input image, the image is placed on
    the center with an appropriate padding to match `size`.
    Otherwise, the input image is scaled to fit in a canvas whose size
    is `size` while preserving aspect ratio.

    Parameters
    ----------
    src : mxnet.nd.NDArray
        The original image with HWC format.
    size : tuple
        Tuple of length 2 as (width, height).
    fill : int or float or array-like
        The value(s) for padded borders. If `fill` is numerical type, RGB channels
        will be padded with single value. Otherwise `fill` must have same length
        as image channels, which resulted in padding with per-channel values.

    Returns
    -------
    mxnet.nd.NDArray
        Augmented image.
    tuple
        Tuple of (offset_x, offset_y, scaled_x, scaled_y)

    """
    h, w, c = src.shape
    ow, oh = size
    scale_h = oh / h
    scale_w = ow / w
    scale = min(min(scale_h, scale_w), 1)
    scaled_x = int(w * scale)
    scaled_y = int(h * scale)
    if scale < 1:
        src = mx.image.imresize(src, scaled_x, scaled_y)

    off_y = (oh - scaled_y) // 2 if scaled_y < oh else 0
    off_x = (ow - scaled_x) // 2 if scaled_x < ow else 0

    # make canvas
    if isinstance(fill, numeric_types):
        dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype)
    else:
        fill = nd.array(fill, ctx=src.context)
        if not c == fill.size:
            raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size))
        dst = nd.repeat(fill, repeats=oh * ow).reshape((oh, ow, c))

    dst[off_y:off_y+scaled_y, off_x:off_x+scaled_x, :] = src
    return dst, (off_x, off_y, scaled_x, scaled_y)
コード例 #3
0
    def hybrid_forward(
        self, F, score_gt, kernel_gt, score_pred, training_masks, *args, **kwargs
    ):
        """
        kernels map's order: [1, ..., 0.5]
        """
        C_pred = score_pred[:, 0, :, :]
        self.pixel_acc = batch_pix_accuracy(C_pred, score_gt)
        # classification loss
        eps = 1e-5
        intersection = F.sum(score_gt * C_pred * training_masks, axis=(1, 2))
        union = (
            F.sum(training_masks * score_gt * score_gt, axis=(1, 2))
            + F.sum(training_masks * C_pred * C_pred, axis=(1, 2))
            + eps
        )

        C_dice_loss = 1.0 - (2 * intersection) / (union)

        # loss for kernel
        kernel_mask = F.where(
            training_masks * C_pred > 0.5, F.ones_like(C_pred), F.zeros_like(C_pred)
        )
        kernel_mask = F.expand_dims(kernel_mask, axis=1)
        kernel_mask = F.repeat(kernel_mask, repeats=self.num_kernels - 1, axis=1)
        self.kernel_acc = batch_pix_accuracy(
            score_pred[:, 1, :, :] * score_gt, kernel_gt[:, 0, :, :]
        )
        kernel_intersection = F.sum(
            kernel_gt * score_pred[:, 1:, :, :] * kernel_mask, axis=(2, 3)
        )
        kernel_union = (
            F.sum(kernel_gt * kernel_gt * kernel_mask, axis=(2, 3))
            + F.sum(
                score_pred[:, 1:, :, :] * score_pred[:, 1:, :, :] * kernel_mask,
                axis=(2, 3),
            )
            + eps
        )
        kernel_dice = 1.0 - (2 * kernel_intersection) / kernel_union

        kernel_dice_loss = F.mean(kernel_dice, axis=1)
        self.C_loss = C_dice_loss
        self.kernel_loss = kernel_dice_loss

        loss = self.lam * C_dice_loss + (1.0 - self.lam) * kernel_dice_loss

        return loss
コード例 #4
0
 def Route(self, x):
     # b_mat = nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0)#nd.stop_gradient(nd.repeat(self.b_mat.data(), repeats=x.shape[0], axis=0))
     b_mat = nd.zeros((x.shape[0], 1, self.num_cap, self.num_locations),
                      ctx=x.context)
     x_expand = nd.expand_dims(nd.expand_dims(x, axis=2), 2)
     w_expand = nd.repeat(nd.expand_dims(self.w_ij.data(x.context), axis=0),
                          repeats=x.shape[0],
                          axis=0)
     u_ = w_expand * x_expand
     u = nd.sum(u_, axis=1)
     for i in range(self.route_num):
         c_mat = nd.softmax(b_mat, axis=2)
         s = nd.sum(u * c_mat, axis=-1)
         v = squash(s, 1)
         v1 = nd.expand_dims(v, axis=-1)
         update_term = nd.sum(u * v1, axis=1, keepdims=True)
         b_mat = b_mat + update_term
     return v
コード例 #5
0
ファイル: coder.py プロジェクト: zzdang/cascade_rcnn_gluon
 def forward(self, samples, matches, anchors, refs):
     """Forward"""
     F = nd
     # TODO(zhreshold): batch_pick, take multiple elements?
     ref_boxes = nd.repeat(refs.reshape((0, 1, -1, 4)), axis=1, repeats=matches.shape[1])
     ref_boxes = nd.split(ref_boxes, axis=-1, num_outputs=4, squeeze_axis=True)
     ref_boxes = nd.concat(*[F.pick(ref_boxes[i], matches, axis=2).reshape((0, -1, 1)) \
         for i in range(4)], dim=2)
     g = self.corner_to_center(ref_boxes)
     a = self.corner_to_center(anchors)
     t0 = ((g[0] - a[0]) / a[2] - self._means[0]) / self._stds[0]
     t1 = ((g[1] - a[1]) / a[3] - self._means[1]) / self._stds[1]
     t2 = (F.log(g[2] / a[2]) - self._means[2]) / self._stds[2]
     t3 = (F.log(g[3] / a[3]) - self._means[3]) / self._stds[3]
     codecs = F.concat(t0, t1, t2, t3, dim=2)
     temp = F.tile(samples.reshape((0, -1, 1)), reps=(1, 1, 4)) > 0.5
     targets = F.where(temp, codecs, F.zeros_like(codecs))
     masks = F.where(temp, F.ones_like(temp), F.zeros_like(temp))
     return targets, masks
コード例 #6
0
    def forward(self, scores, offsets, anchors, img):
        # 训练和预测的处理流程不同
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms
        with autograd.pause():
            # 将预测的偏移量加到anchors中
            rois = self._bbox_decoder(offsets, self._bbox_tocenter(anchors))
            rois = self._cliper(rois, img)

            # 下面将所有尺寸小于设定最小值的ROI去除
            x_min, y_min, x_max, y_max = nd.split(rois, num_outputs=4, axis=-1)
            width = x_max - x_min
            height = y_max - y_min
            invalid_mask = (width < self._min_size) + (height < self._min_size)

            # 将对应位置的score 设为-1
            scores = nd.where(invalid_mask, nd.ones_like(scores) * -1, scores)
            invalid_mask = nd.repeat(invalid_mask, repeats=4, axis=-1)
            rois = nd.where(invalid_mask, nd.ones_like(rois) * -1, rois)

            # 下面进行NMS操作
            pre = nd.concat(scores, rois, dim=-1)
            pre = nd.contrib.box_nms(pre,
                                     overlap_thresh=self._nms_thresh,
                                     topk=pre_nms,
                                     coord_start=1,
                                     score_index=0,
                                     id_index=-1,
                                     force_suppress=True)
            # 下面进行采样
            result = nd.slice_axis(pre, axis=1, begin=0, end=post_nms)
            rpn_score = nd.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_bbox = nd.slice_axis(result, axis=-1, begin=1, end=None)

        return rpn_score, rpn_bbox
コード例 #7
0
    def sample_neighbours(self, data, query_network):
        num_stored_samples = self.key_memory.shape[0]
        batch_size = data[0].shape[0]

        query = query_network(*data).as_in_context(mx.cpu())

        vec1 = nd.repeat(query, repeats=num_stored_samples, axis=0)
        vec2 = nd.tile(self.key_memory, reps=(batch_size, 1))
        diff = nd.subtract(vec1, vec2)
        sq = nd.square(diff)
        batch_sum = nd.sum(sq, exclude=1, axis=0)
        sqrt = nd.sqrt(batch_sum)

        dist = nd.reshape(sqrt, shape=(batch_size, num_stored_samples))

        sample_ind = nd.topk(dist, k=self.k, axis=1, ret_typ="indices")
        num_outputs = len(self.label_memory)

        sample_labels = [self.label_memory[i][sample_ind] for i in range(num_outputs)]
        sample_batches = [[self.value_memory[j][sample_ind] for j in range(len(self.value_memory))], sample_labels]

        return sample_batches
コード例 #8
0
ファイル: latwalk.py プロジェクト: chr5tphr/ecGAN
def render(gfunc, stepsize=0.1, momentum=0.9, maxstep=24000):
    K = 10
    num = 30
    bbox = config.data.bbox
    cond = nd.one_hot(nd.repeat(nd.arange(K, ctx=ctx), (num-1)//K+1)[:num], K).reshape((num, K, 1, 1))
    anoi = nd.random.normal(shape=(num,100,1,1), ctx=ctx)
    bnoi = nd.random.normal(shape=(num,100,1,1), ctx=ctx)
    slast = 0.
    for step in range(maxstep):
        snoi = anoi - bnoi

        sdist = snoi.norm(axis=1,keepdims=True)
        if sdist.min().asscalar() < .5:
            anoi = nd.random.normal(shape=(30,100,1,1), ctx=ctx)
        snoi /= sdist
        slast = stepsize*snoi + momentum*slast
        bnoi += slast

        gen = gfunc(noise=bnoi, cond=cond)
        indat = ((gen - bbox[0]) * 255/(bbox[1]-bbox[0])).asnumpy().clip(0, 255).astype(np.uint8)
        indat = align_images(indat, 5, 6, 32, 32, 3)
        yield indat
コード例 #9
0
ファイル: box.py プロジェクト: ydlstartx/MultiDet
def getwh(scales, ratios, fw, fh, srmode):
    if srmode == 'few':
        num = scales.size + ratios.size - 1
        width = nd.zeros((num,))
        height = nd.zeros((num,))
        
        sqt_ratios = nd.sqrt(ratios)
        width[:ratios.size] = scales[0] * sqt_ratios
        height[:ratios.size] = width[:ratios.size] / ratios
        
        width[ratios.size:] = scales[1:] * sqt_ratios[0]
        height[ratios.size:] = width[ratios.size:] / ratios[0]
    else:
        rscales = nd.repeat(scales, ratios.size)
        rratios = nd.tile(ratios, scales.size)
        
        width = rscales * nd.sqrt(rratios)
        height = width / rratios
        
    width = width * fw
    height = height * fh
    
    return width, height
コード例 #10
0
def repeat_emb(param, emb):
    """Maybe repeat an embedding."""
    res = nd.expand_dims(emb, 0)
    param.repeated = nd.repeat(res, repeats=param.n_repeats, axis=0)
    param.repeated.attach_grad()
    return param.repeated
コード例 #11
0
def test_repeat():
    x = create_vector(size=LARGE_X // 2)
    y = nd.repeat(x, repeats=2, axis=0)
    assert y.shape[0] == LARGE_X
    assert y[1] == 0
    assert y[LARGE_X - 1] == LARGE_X // 2 - 1
コード例 #12
0
ファイル: transformer.py プロジェクト: cheesama/transformer
 def hybrid_forward(self, F, x, a, b):
     mean = x.mean(axis = -1) # batch * _in_seq_len
     _mean = nd.repeat(mean.expand_dims(axis = -1), repeats = x.shape[-1], axis = -1) # batch * _in_seq_len * embedding_dim
     std = nd.sqrt(nd.sum(nd.power((x - _mean), 2), axis = -1) / x.shape[1]) # batch * _in_seq_len
     _std = nd.repeat(std.expand_dims(axis = -1), repeats = x.shape[-1], axis = -1) # batch * _in_seq_len * embedding_dim
     return F.elemwise_div(F.multiply((x - _mean), a), (_std  + self.eps)) + b
コード例 #13
0
def predict_transform(prediction, input_dim, anchors):
    # get the anchor boxes in context
    ctx = prediction.context
    if not isinstance(anchors, nd.NDArray):
        anchors = nd.array(anchors, ctx=ctx)

    # get the batch size, anchor boxes per pyramid, and size of feature maps
    batch_size = prediction.shape[0]
    anchors_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    strides = [13, 26, 52]

    # TODO this can automatically be calculated
    step = [(0, 507), (507, 2535), (2535, 10647)]
    for i in range(3):
        stride = strides[i]
        grid = np.arange(stride)

        # basically repeats the above arange both vertically (a) and
        # horizontally (b)
        a, b = np.meshgrid(grid, grid)
        x_offset = nd.array(a.reshape((-1, 1)), ctx=ctx)
        y_offset = nd.array(b.reshape((-1, 1)), ctx=ctx)

        # creates coordinate pairs. Three for each coordinate
        # ((0,0), (0,0), (0,0), (0,1), ... (12, 12)
        x_y_offset = \
            nd.repeat(
                nd.expand_dims(
                    nd.repeat(
                        nd.concat(
                            x_offset, y_offset, dim=1), repeats=3, axis=0
                    ).reshape((-1, 2)),
                    0
                ),
                repeats=batch_size, axis=0
            )

        # projects the anchor box sizes to match with the previous x_y_offset
        # grid setup
        tmp_anchors = \
            nd.repeat(
                nd.expand_dims(
                    nd.repeat(
                        nd.expand_dims(
                            anchors[anchors_masks[i]], 0
                        ),
                        repeats=stride * stride, axis=0
                    ).reshape((-1, 2)),
                    0
                ),
                repeats=batch_size, axis=0
            )

        # add the x,y offset to the xy of the predicition to get the coordinate
        # relative to the feature map origin instead of the grid location origin
        prediction[:, step[i][0]:step[i][1], :2] += x_y_offset

        # Scale the current feature map to match the input image size
        prediction[:, step[i][0]:step[i][1], :2] *= (float(input_dim) / stride)

        # scale the hw of the prediction to be relative to the anchorboxes
        prediction[:, step[i][0]:step[i][1], 2:4] = \
            nd.exp(prediction[:, step[i][0]:step[i][1], 2:4]) * tmp_anchors

    return prediction
コード例 #14
0
def prep_final_label(labels, num_classes, input_dim=416):
    # expected format for labels:
    # [[x, y, w, h, objectivity, class0, class1, ...],
    #  [x, y, w, h, objectivity, class0, class1, ...],
    #  ...
    #  [x, y, w, h, objectivity, class0, class1, ...]]
    # Shape: (30, 5 + num_classes)
    # TODO The number of labels is hardcoded to 30, I think this is the max
    # number of items in a single image in the dataset. I should check on that
    ctx = labels.context

    # These anchors are borrows from those calculated on the COCO dataset from
    # the yolo v3 paper
    anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                        (59, 119), (116, 90), (156, 198), (373, 326)],
                       ctx=ctx)

    # This determines which bounding boxes to use at the different pyramids
    # Looks like the idea is to locate the larger anchor boxes at the
    # smaller feature maps i.e. further downstream of network
    anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    # create empty labels that will eventually contains ground-truth labels with
    # dimensions relative to the feature map anchor boxes
    label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)
    label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)
    label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)

    # create empty labels that will eventually contain ground-truth labels with
    # dimensions relative to the source input image
    true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx)
    true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx)
    true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx)

    label_list = [label_1, label_2, label_3]
    true_label_list = [true_label_1, true_label_2, true_label_3]

    # loop over the individual labels in this hardcoded label file
    for x_box in range(labels.shape[0]):
        # if the objectivity score is 0, then we don't care about this label
        if labels[x_box, 4] == 0.0:
            break

        # loop from 0-2 to handle the different pyramid scales
        for i in range(3):
            # stride == The size of the current feature map
            stride = 2**i * 13

            # the anchor boxes to reference at this pyramid level
            tmp_anchors = anchors[anchors_mask[i]]

            # scale the xywh to the current feature map size so the coordinates
            # are relative to the feature map
            # then repeat those values across dimension 0 so we can determine
            # which bounding box has the highest IoU
            tmp_xywh = nd.repeat(nd.expand_dims(labels[x_box, :4] * stride,
                                                axis=0),
                                 repeats=tmp_anchors.shape[0],
                                 axis=0)

            # copy the previous tensor and plug in the bounding box height and
            # width. This allows us to retain the correct bounding box centers
            # and only change the bounding box size
            # Note that we are scaling the bounding box wh so that they are also
            # relative to the size of the feature map
            anchor_xywh = tmp_xywh.copy()
            anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride

            # determine which of these bounding boxes has the highest IoU and
            # thus is the best anchorbox for this label
            best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0)
            label = labels[x_box].copy()

            # scale the offsets again (TODO why do this again?), make sure
            # we have nice round numbers
            tmp_idx = nd.floor(label[:2] * stride)

            # TODO We don't need to calculate this twice
            label[:2] = label[:2] * stride

            # subtract the floored values so that we just get an offset from the
            # origin of this feature location scaled 0-1
            label[:2] -= tmp_idx
            tmp_idx = tmp_idx.astype("int")

            # calculate the offset of the ground truth from our best fit anchor
            # box based on equation `p * e ^ (t) where `p` is the anchor box and
            # `t` is the ground truth label

            label[2:4] = nd.log(label[2:4] * input_dim /
                                tmp_anchors[best_anchor].reshape(-1) + 1e-12)

            # flip the x and y coordinates for some reason (TODO why? does this
            # work the other way?) and assign to correct grid location and
            # anchor box. (TODO this doesn't allow for multiple objects in the
            # same grid location with the same bounding box. what do in that
            # case?)
            label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label

            # scale what we just figured out to the size of the original image
            # for convenience of display, I guess??? TODO why do this?
            true_xywhs = labels[x_box, :5] * input_dim
            true_xywhs[4] = 1.0
            true_label_list[i][tmp_idx[1], tmp_idx[0],
                               best_anchor] = true_xywhs

    # reshape our network label so it is shape (num_bounding_boxes, 5 + class_count)
    t_y = nd.concat(label_1.reshape((-1, num_classes + 5)),
                    label_2.reshape((-1, num_classes + 5)),
                    label_3.reshape((-1, num_classes + 5)),
                    dim=0)

    # reshape our human labels so it is shape(num_bounding_boxes, 5)
    t_xywhs = nd.concat(true_label_1.reshape((-1, 5)),
                        true_label_2.reshape((-1, 5)),
                        true_label_3.reshape((-1, 5)),
                        dim=0)

    return t_y, t_xywhs
コード例 #15
0
def prep_final_label(labels, num_classes, input_dim=416):
    ctx = labels.context
    anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                        (59, 119), (116, 90), (156, 198), (373, 326)],
                       ctx=ctx)
    anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)
    label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)
    label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)

    true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx)
    true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx)
    true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx)

    label_list = [label_1, label_2, label_3]
    true_label_list = [true_label_1, true_label_2, true_label_3]
    for x_box in range(labels.shape[0]):
        if labels[x_box, 4] == 0.0:
            break
        for i in range(3):
            stride = 2**i * 13
            tmp_anchors = anchors[anchors_mask[i]]
            tmp_xywh = nd.repeat(nd.expand_dims(labels[x_box, :4] * stride,
                                                axis=0),
                                 repeats=tmp_anchors.shape[0],
                                 axis=0)
            anchor_xywh = tmp_xywh.copy()
            anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride
            best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0)
            label = labels[x_box].copy()
            tmp_idx = nd.floor(label[:2] * stride)
            label[:2] = label[:2] * stride
            label[:2] -= tmp_idx
            tmp_idx = tmp_idx.astype("int")
            label[2:4] = nd.log(label[2:4] * input_dim /
                                tmp_anchors[best_anchor].reshape(-1) + 1e-12)

            label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label

            true_xywhs = labels[x_box, :5] * input_dim
            true_xywhs[4] = 1.0
            true_label_list[i][tmp_idx[1], tmp_idx[0],
                               best_anchor] = true_xywhs

    t_y = nd.concat(label_1.reshape((-1, num_classes + 5)),
                    label_2.reshape((-1, num_classes + 5)),
                    label_3.reshape((-1, num_classes + 5)),
                    dim=0)
    t_xywhs = nd.concat(true_label_1.reshape((-1, 5)),
                        true_label_2.reshape((-1, 5)),
                        true_label_3.reshape((-1, 5)),
                        dim=0)

    return t_y, t_xywhs
コード例 #16
0
def prep_final_label(labels, num_classes, input_dim=416):
    '''
        输入:
            labels : 416尺寸变形后的结果集标签[30行,[x,y,w,h,pc,c0-c23]=5+24=29]
            num_classes : 数值=24类
            imput_dim : 图像输入卷积的统一尺寸
        输出:
            t_y:[8,10647,7]=[batch_num,13x13x3+26x26x3+52x52x3,[tx,ty,tw,th,pc,c1,c2]
            t_xywhs:[8,10647,5]=[batch_num,13x13x3+26x26x3+52x52x3,[x,y,w,h,pc]
            
            
    '''
    ctx = labels.context
    # define 9 boxs
    anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                        (59, 119), (116, 90), (156, 198), (373, 326)],
                       ctx=ctx)
    # define 3 group
    anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    # define 3 classes size box for label = (size, size, 锚框数=3对, 29)
    label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)
    label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)
    label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5),
                       dtype="float32",
                       ctx=ctx)

    # define 3 classes size box for true label = (size, size, 3, 5)
    true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx)
    true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx)
    true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx)

    # define list save 3 different size box label and true label
    label_list = [label_1, label_2, label_3]
    true_label_list = [true_label_1, true_label_2, true_label_3]

    # 逐行处理[x,y,w,h,pc,c,.....,c24]
    m, n = labels.shape
    for x_box in range(m):
        ##        print(u'正在处理第{}个对象'.format(x_box))
        if labels[x_box, 4].asscalar() == 0.0:
            ##            print(u'step labels[{},4]==0.0,退出一幅图片处理。'.format(x_box))
            break

        # 循环得到13,26,52步长 = 三个单元框的大小 13x13  26x26 52x52
        for i in range(3):
            stride = 2**i * 13

            tmp_anchors = anchors[anchors_mask[i]]  # 得到一组含3个锚框尺寸   [3,2]

            # 装实y[3,[x,y,w,h]*13]
            tmp_xywh = nd.repeat(
                nd.expand_dims(labels[x_box, :4] * stride, axis=0),
                repeats=tmp_anchors.shape[0],
                axis=0)  # [3, 4]每行代表相同锚框x3,列是[x, y, w, h]*单元框宽

            # 装[ix3 , [x, y, 锚框i_w*13/416, 锚框i_h*13/416]]
            anchor_xywh = tmp_xywh.copy()
            anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride  # [3, 4]

            # 得到最接近的锚框序号
            best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0)
            ##            print(u'选最好的预测大小[13,26,52]框的序号是:{}'.format(best_anchor))

            # 计算盒子的位置索引
            label = labels[x_box].copy(
            )  # 已经调整尺寸的y shape = [1,29]  行=[x,y,w,h,pc,c0,c1,......,c24]
            k = nd.floor(label[:2] * stride)
            label[:2] = label[:2] * stride - k  # [x,y]*13 - [x,y]*13取整=余数
            ##            print(u'索引序号变化结果:{}'.format(label[:2]))

            tmp_idx = k  # [x,y]*13 四写五入=取整
            tmp_idx = tmp_idx.astype("int")
            ##            print(u'临时索引的值:{}'.format(tmp_idx))
            label[2:4] = nd.log(label[2:4] * input_dim /
                                tmp_anchors[best_anchor].reshape(-1) + 1e-12)

            true_xywhs = labels[x_box, :5] * input_dim
            true_xywhs[4] = 1.0

            label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label  # here
            ##            print('sum(true_xywhs[4]==1):{}'.format(nd.sum(true_xywhs[4]==1)))
            true_label_list[i][tmp_idx[1], tmp_idx[0],
                               best_anchor] = true_xywhs

    t_y = nd.concat(label_list[0].reshape((-1, num_classes + 5)),
                    label_list[1].reshape((-1, num_classes + 5)),
                    label_list[2].reshape((-1, num_classes + 5)),
                    dim=0)

    t_xywhs = nd.concat(true_label_list[0].reshape((-1, 5)),
                        true_label_list[1].reshape((-1, 5)),
                        true_label_list[2].reshape((-1, 5)),
                        dim=0)
    return t_y, t_xywhs
コード例 #17
0
def write_results(prediction, num_classes, confidence=0.5, nms_conf=0.4):

    ### 
    box_confidence = nd.repeat(nd.expand_dims(prediction[:,:,4],2),num_classes,axis=2)
    prediction[:,:,5:5+num_classes] = prediction[:,:,5:5+num_classes] * box_confidence
    ###
    #conf_mask = (prediction[:, :, 4] > confidence).expand_dims(2)
    conf_mask = prediction[:, :, 5:5+num_classes] > confidence
    prediction[:, :, 5:5+num_classes]  = prediction[:, :, 5:5+num_classes] * conf_mask

    batch_size = prediction.shape[0]

    box_corner = nd.zeros(prediction.shape, dtype="float32")
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
    prediction[:, :, :4] = box_corner[:, :, :4]
    #pdb.set_trace()
    output = None

    for ind in range(batch_size):
        image_pred = prediction[ind]

        max_conf = nd.max(image_pred[:, 5:5 + num_classes], axis=1)
        max_conf_score = nd.argmax(image_pred[:, 5:5 + num_classes], axis=1)
        max_conf = max_conf.astype("float32").expand_dims(1)
        max_conf_score = max_conf_score.astype("float32").expand_dims(1)
        image_pred = nd.concat(image_pred[:, :5], max_conf, max_conf_score, dim=1).asnumpy()
        non_zero_ind = np.nonzero(image_pred[:, 5])
	
        try:
            image_pred_ = image_pred[non_zero_ind, :].reshape((-1, 7))
        except Exception as e:
            print(e)
            continue
        if image_pred_.shape[0] == 0:
            continue
        # Get the various classes detected in the image
        img_classes = np.unique(image_pred_[:, -1])
        # -1 index holds the class index
	
		
        for cls in img_classes:
            # get the detections with one particular class
            cls_mask = image_pred_ * np.expand_dims(image_pred_[:, -1] == cls, axis=1)
            class_mask_ind = np.nonzero(cls_mask[:, -2])
            image_pred_class = image_pred_[class_mask_ind].reshape((-1, 7))

            # sort the detections such that the entry with the maximum objectness
            # confidence is at the top
            conf_sort_index = np.argsort(image_pred_class[:, 5])[::-1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.shape[0]
	    
	    pdb_num = 0
            for i in range(idx):
                # Get the IOUs of all boxes that come after the one we are looking at
                # in the loop
                try:
                    box1 = np.expand_dims(image_pred_class[i], 0)
                    box2 = image_pred_class[i + 1:]
                    if len(box2) == 0:
                        break
                    box1 = np.repeat(box1, repeats=box2.shape[0], axis=0)
                    ious = bbox_iou(box1, box2, transform=False)
                except ValueError:
                    break
                except IndexError:
                    break

                # Zero out all the detections that have IoU > treshhold
                iou_mask = np.expand_dims(ious < nms_conf, 1).astype(np.float32)
                image_pred_class[i + 1:] *= iou_mask

                # Remove the non-zero entries
                non_zero_ind = np.nonzero(image_pred_class[:, 5])
                image_pred_class = image_pred_class[non_zero_ind].reshape((-1, 7))
		pdb_num +=1

            batch_ind = np.ones((image_pred_class.shape[0], 1)) * ind

            seq = nd.concat(nd.array(batch_ind), nd.array(image_pred_class), dim=1)

            if output is None:
                output = seq
            else:
                output = nd.concat(output, seq, dim=0)
    return output