Beispiel #1
0
def yolo2_forward(x, num_class, anchor_scales):
    """Transpose/reshape/organize convolution outputs."""
    stride = num_class + 5
    # transpose and reshape, 4th dim is the number of anchors
    x = x.transpose((0, 2, 3, 1))
    x = x.reshape((0, 0, 0, -1, stride))
    # now x is (batch, m, n, stride), stride = num_class + 1(object score) + 4(coordinates)
    # class probs
    cls_pred = x.slice_axis(begin=0, end=num_class, axis=-1)
    # object score
    score_pred = x.slice_axis(begin=num_class, end=num_class + 1, axis=-1)
    score = nd.sigmoid(score_pred)
    # center prediction, in range(0, 1) for each grid
    xy_pred = x.slice_axis(begin=num_class + 1, end=num_class + 3, axis=-1)
    xy = nd.sigmoid(xy_pred)
    # width/height prediction
    wh = x.slice_axis(begin=num_class + 3, end=num_class + 5, axis=-1)
    # convert x, y to positions relative to image
    x, y = transform_center(xy)
    # convert w, h to width/height relative to image
    w, h = transform_size(wh, anchor_scales)
    # cid is the argmax channel
    cid = nd.argmax(cls_pred, axis=-1, keepdims=True)
    # convert to corner format boxes
    half_w = w / 2
    half_h = h / 2
    left = nd.clip(x - half_w, 0, 1)
    top = nd.clip(y - half_h, 0, 1)
    right = nd.clip(x + half_w, 0, 1)
    bottom = nd.clip(y + half_h, 0, 1)
    output = nd.concat(*[cid, score, left, top, right, bottom],
                       dim=4)  # 为什么left和top有很多0?
    return output, cls_pred, score, nd.concat(*[xy, wh], dim=4)
Beispiel #2
0
def bgr2hsi(x):
    """ x:n,c(b,g,r),w,h
        return n,c(h,s,i),w,h
    """
    sum_RGB = nd.sum(x.astype('float32'), axis=1)
    R = x[:, 0, :, :].astype('float32')
    G = x[:, 1, :, :].astype('float32')
    B = x[:, 2, :, :].astype('float32')

    r = (R + eps) / (sum_RGB + 3 * eps)
    g = (G + eps) / (sum_RGB + 3 * eps)
    b = (B + eps) / (sum_RGB + 3 * eps)

    cossita = (2 * r - g - b) / (2 * ((r - g)**2 + (r - b) *
                                      (g - b))**(1.0 / 2) + eps)
    cossita_cilp = nd.clip(cossita, -1.0, 1.0)

    sita = nd.arccos(cossita_cilp)

    h = (nd.where(g >= b, sita, 2 * math.pi - sita)).expand_dims(axis=1)

    s = (1 - 3 * nd.minimum(nd.minimum(r, g), b)).expand_dims(axis=1)
    s = nd.clip(s, 0., 1.)

    i = ((R + G + B) / 3).expand_dims(axis=1)

    return nd.concat(h, s, i, dim=1)
Beispiel #3
0
def check_tbox(image, label):
    plt.clf()
    rgb_mean = RGB_MEAN.as_in_context(image.context)
    rgb_std = RGB_STD.as_in_context(image.context)
    assert label.shape == (1, 5), \
        "shape of label expected [1, 5], but given {}".format(label.shape)
    assert image.shape == (3, 256, 256), \
        "shape of image expected [3, 256, 256], given {}".format(image.shape)
    scores_tmp = nd.zeros((1, 16, 16, 3, 1))
    label = label.expand_dims(axis=0)
    tid, tscore, tbox, _ = yolo2_target(scores_tmp, label, anchor_scales)
    t_xy = tbox.slice_axis(begin=0, end=2, axis=-1)
    t_wh = tbox.slice_axis(begin=2, end=4, axis=-1)
    xy = nd.sigmoid(t_xy)
    x, y = transform_center(xy)
    w, h = transform_size(t_wh, anchor_scales)

    left = nd.clip(x - w / 2, 0, 1)
    top = nd.clip(y - h / 2, 0, 1)
    right = nd.clip(x + w / 2, 0, 1)
    bottom = nd.clip(y + h / 2, 0, 1)

    output = nd.concat(*[tid, tscore, left, top, right, bottom], dim=-1)
    out = nd.contrib.box_nms(output.reshape((0, -1, 6)))
    out = out.asnumpy()
    box = out[0][0][2:6] * np.array([image.shape[1], image.shape[2]] * 2)
    rect = box_to_rect(nd.array(box), 'green', 2)
    image = image.transpose((1, 2, 0))
    i0 = (image * rgb_std + rgb_mean).asnumpy()
    i0 = i0.clip(0, 255) / 255.
    plt.imshow(i0)
    plt.gca().add_patch(rect)
    plt.show()
    #plt.savefig('check_tbox.jpg')
    return box
Beispiel #4
0
 def action_clip(self, action):
     if len(action[0]) == 2:
         action0 = nd.clip(action[:, 0], float(self.action_bound[0][0].asnumpy()), float(self.action_bound[0][1].asnumpy()))
         action1 = nd.clip(action[:, 1], float(self.action_bound[1][0].asnumpy()), float(self.action_bound[1][1].asnumpy()))
         clipped_action = nd.concat(action0.reshape(-1, 1), action1.reshape(-1, 1))
     else:
         clipped_action = nd.clip(action, float(self.action_bound[0][0].asnumpy()), float(self.action_bound[0][1].asnumpy()))
     return clipped_action
Beispiel #5
0
    def forward(self, adj, feat):
        r"""Compute (Dense) Graph Convolution layer.

        Parameters
        ----------
        adj : mxnet.NDArray
            The adjacency matrix of the graph to apply Graph Convolution on, when
            applied to a unidirectional bipartite graph, ``adj`` should be of shape
            should be of shape :math:`(N_{out}, N_{in})`; when applied to a h**o
            graph, ``adj`` should be of shape :math:`(N, N)`. In both cases,
            a row represents a destination node while a column represents a source
            node.
        feat : torch.Tensor
            The input feature.

        Returns
        -------
        mxnet.NDArray
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.
        """
        adj = adj.astype(feat.dtype).as_in_context(feat.context)
        src_degrees = nd.clip(adj.sum(axis=0), a_min=1, a_max=float('inf'))
        dst_degrees = nd.clip(adj.sum(axis=1), a_min=1, a_max=float('inf'))
        feat_src = feat

        if self._norm == 'both':
            norm_src = nd.power(src_degrees, -0.5)
            shp_src = norm_src.shape + (1, ) * (feat.ndim - 1)
            norm_src = norm_src.reshape(shp_src).as_in_context(feat.context)
            feat_src = feat_src * norm_src

        if self._in_feats > self._out_feats:
            # mult W first to reduce the feature size for aggregation.
            feat_src = nd.dot(feat_src, self.weight.data(feat_src.context))
            rst = nd.dot(adj, feat_src)
        else:
            # aggregate first then mult W
            rst = nd.dot(adj, feat_src)
            rst = nd.dot(rst, self.weight.data(feat_src.context))

        if self._norm != 'none':
            if self._norm == 'both':
                norm_dst = nd.power(dst_degrees, -0.5)
            else:  # right
                norm_dst = 1.0 / dst_degrees
            shp_dst = norm_dst.shape + (1, ) * (feat.ndim - 1)
            norm_dst = norm_dst.reshape(shp_dst).as_in_context(feat.context)
            rst = rst * norm_dst

        if self.bias is not None:
            rst = rst + self.bias.data(feat.context)

        if self._activation is not None:
            rst = self._activation(rst)

        return rst
Beispiel #6
0
def getDefaultBoxes(fmap, s=None, r=None, 
                    offset=None, norm=None, clip=False, 
                    srmode='few', omode='flatten'):
    assert omode in ('flatten', 'stack')
    assert srmode in ('few', 'many')
    n, c, fh, fw = fmap.shape
    
    if s is None:
        scales = nd.array([1.])
    else:
        scales = nd.array(s)

    if r is None:
        ratios = nd.array([1.])
    else:
        ratios = nd.array(r)
        
    width, height = getwh(scales, ratios, fw, fh, srmode)
    
    nbox_per_pixel = width.size
    xcenter = nd.repeat(nd.arange(fw).reshape((1,-1)), fh, axis=0)
    ycenter = nd.repeat(nd.arange(fh).reshape((-1,1)), fw, axis=1)
    xycenters = nd.stack(xcenter, ycenter, axis=2)
    xycenters = nd.tile(xycenters, [1, 1, nbox_per_pixel*2])
    

    lu_rd_offset = nd.stack(width*-0.5, height*-0.5, width*0.5, height*0.5, axis=1)

    lu_rd_offset = lu_rd_offset.reshape((-1,))
    
    lu_rd_points = (xycenters + lu_rd_offset).reshape((fh, fw, nbox_per_pixel, 2, 2))
    
    if offset is None:
        offset = nd.array([0.5, 0.5])
    else:
        offset = nd.array(offset)
    assert offset.size <= 2
    
    if norm is None:
        norm = nd.array([fw, fh])
    else:
        norm = nd.array(norm)
    assert norm.size <= 2
    
    lu_rd_points = (lu_rd_points + offset) / norm
    
    if clip:
        nd.clip(lu_rd_points, a_min=0., a_max=1., out=lu_rd_points)
    
    if omode == 'flatten':
        lu_rd_points = lu_rd_points.reshape((1, -1, 4))
    else:
        lu_rd_points = lu_rd_points.reshape((1, fh, fw, nbox_per_pixel, 4))
    
    return lu_rd_points
Beispiel #7
0
def box_ciou(b1, b2):
    """
    输入为:
    ----------
    b1: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
    b2: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh

    返回为:
    -------
    ciou: NDarray, shape=(batch, feat_w, feat_h, anchor_num, 1)
    """
    # 求出预测框左上角右下角
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh / 2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half
    # 求出真实框左上角右下角
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh / 2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    # 求真实框和预测框所有的iou
    intersect_mins = nd.max(b1_mins, b2_mins)
    intersect_maxes = nd.min(b1_maxes, b2_maxes)
    intersect_wh = nd.max(intersect_maxes - intersect_mins,
                          nd.zeros_like(intersect_maxes))
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    union_area = b1_area + b2_area - intersect_area
    iou = intersect_area / nd.clip(union_area, a_min=1e-6)

    # 计算中心的差距
    center_distance = nd.sum(nd.power((b1_xy - b2_xy), 2), axis=-1)

    # 找到包裹两个框的最小框的左上角和右下角
    enclose_mins = nd.min(b1_mins, b2_mins)
    enclose_maxes = nd.max(b1_maxes, b2_maxes)
    enclose_wh = nd.max(enclose_maxes - enclose_mins,
                        nd.zeros_like(intersect_maxes))
    # 计算对角线距离
    enclose_diagonal = nd.sum(nd.power(enclose_wh, 2), axis=-1)
    ciou = iou - 1.0 * (center_distance) / nd.clip(enclose_diagonal,
                                                   a_min=1e-6)

    v = (4 / (math.pi**2)) * nd.power(
        (nd.arctan(b1_wh[..., 0] / nd.clip(b1_wh[..., 1], min=1e-6)) -
         nd.arctan(b2_wh[..., 0] / nd.clip(b2_wh[..., 1], a_min=1e-6))), 2)
    alpha = v / nd.clip((1.0 - iou + v), a_max=1e-6)
    ciou = ciou - alpha * v
    return ciou
Beispiel #8
0
def gan_mse(p, g, device):

    #return (p - mx.nd.ones_like(p, ctx = device)) ** 2 if g == 'real' else (p - mx.nd.zeros_like(p, ctx = device)) ** 2
    #return mx.nd.abs(p - mx.nd.ones_like(p, ctx = device)) if g == 'real' else mx.nd.abs(p - mx.nd.zeros_like(p, ctx = device))

    g = mx.nd.ones_like(p) if g == 'real' else mx.nd.zeros_like(p)

    #g = mx.nd.ones_like(p) + mx.nd.random.normal(loc = 0, scale = 0.1, ctx = device) if g == 'real' else mx.nd.zeros_like(p) + \
    #         mx.random.normal(loc = 0, scale = 0.1, ctx = device)
    return -nd.clip(g, 0, 1) * nd.log(nd.clip(
        p, 1e-5, 1)) - (1 - nd.clip(g, 0, 1)) * nd.log(nd.clip(1 - p, 1e-5, 1))
Beispiel #9
0
def bbox_iou(box1, box2, transform=True):
    """Calculate the IoU Error
    """

    #Change to NDArray if not
    if not isinstance(box1, nd.NDArray):
        box1 = nd.array(box1)
    if not isinstance(box2, nd.NDArray):
        box2 = nd.array(box2)

    #Make sure > 0
    box1 = nd.abs(box1)
    box2 = nd.abs(box2)
    '''Calculate the IoU'''
    if transform:
        tmp_box1 = box1.copy()
        tmp_box1[:, 0] = box1[:, 0] - box1[:, 2] / 2.0
        tmp_box1[:, 1] = box1[:, 1] - box1[:, 3] / 2.0
        tmp_box1[:, 2] = box1[:, 0] + box1[:, 2] / 2.0
        tmp_box1[:, 3] = box1[:, 1] + box1[:, 3] / 2.0
        box1 = tmp_box1

        tmp_box2 = box2.copy()
        tmp_box2[:, 0] = box2[:, 0] - box2[:, 2] / 2.0
        tmp_box2[:, 1] = box2[:, 1] - box2[:, 3] / 2.0
        tmp_box2[:, 2] = box2[:, 0] + box2[:, 2] / 2.0
        tmp_box2[:, 3] = box2[:, 1] + box2[:, 3] / 2.0
        box2 = tmp_box2

    # Get the coordinates of bounding boxes (xStart,yStart,xEnd,yEnd)
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]

    # get the corrdinates of the intersection rectangle
    inter_rect_x1 = nd.where(
        b1_x1 > b2_x1, b1_x1, b2_x1
    )  #if b1_x1 > b2_x1 => x1 of the intersection rectangle must be b1_x1, otherwise it will be b2_x1. Basically it's just a max function!
    inter_rect_y1 = nd.where(b1_y1 > b2_y1, b1_y1, b2_y1)
    inter_rect_x2 = nd.where(b1_x2 < b2_x2, b1_x2, b2_x2)
    inter_rect_y2 = nd.where(b1_y2 < b2_y2, b1_y2, b2_y2)

    # Intersection area
    inter_area = nd.clip(
        inter_rect_x2 - inter_rect_x1 + 1, a_min=0, a_max=10000) * nd.clip(
            inter_rect_y2 - inter_rect_y1 + 1, a_min=0, a_max=10000)

    # Union Area
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
    iou = inter_area / (b1_area + b2_area - inter_area)

    return nd.clip(iou, 1e-5, 1. - 1e-5)
Beispiel #10
0
def SiameseForward(cls_pred, bbox_pred, anchor_scales, Training=True):

    num_anchor = len(anchor_scales)
    cls_pred = mx.ndarray.transpose(cls_pred, (0, 2, 3, 1))
    cls_pred = mx.ndarray.reshape(cls_pred, (0, 0, 0, num_anchor, -1))

    bbox_pred = mx.ndarray.transpose(bbox_pred, (0, 2, 3, 1))
    bbox_pred = mx.ndarray.reshape(bbox_pred, (0, 0, 0, num_anchor, -1))

    #print(bbox_pred.shape )

    xy = bbox_pred.slice_axis(begin=0, end=2, axis=-1)
    xy = mx.ndarray.sigmoid(xy)
    x, y = transform_center(xy)

    wh = bbox_pred.slice_axis(begin=2, end=4, axis=-1)
    w, h = transform_size(wh, anchor_scales)
    # cid is the argmax channel

    cid = nd.argmax(cls_pred, axis=-1, keepdims=True)

    # print(cls_pred.shape)
    # print(cid.shape)
    half_w = w / 2
    half_h = h / 2
    left = nd.clip(x - half_w, 0, 1)
    top = nd.clip(y - half_h, 0, 1)
    right = nd.clip(x + half_w, 0, 1)
    bottom = nd.clip(y + half_h, 0, 1)
    #output = nd.concat(*[cid,left, top, right, bottom], dim=4)
    if Training:
        return cls_pred, nd.concat(*[xy, wh], dim=4)

    if not Training:

        score = nd.softmax(cls_pred, axis=-1)
        score = nd.max(score, axis=-1, keepdims=True)
        # discard = _FarAwayCenter(score)
        # score = discard * score
        #
        # score = mx.ndarray.reshape(score,(0,0,0,-1))
        # print(score.shape)
        # cos_window =_cosine_window(score)
        # score = score *cos_window
        # score = mx.ndarray.reshape(score, (0, 0, 0, num_anchor,-1))
        # #output = nd.concat(*[cid, score, left, top, right, bottom], dim=4)
        p_w = right - left
        p_h = bottom - top
        return cid, score, nd.concat(*[left, top, right, bottom],
                                     dim=4), p_w, p_h
Beispiel #11
0
def bbox_iou(box1, box2, transform=True, ctx=None):
    '''
        判断预测盒子和实际盒子的重合度。>0.5是比较好的预测
    '''

    ctx = ctx
    if not isinstance(box1, nd.NDArray):
        box1 = nd.array(box1, ctx=ctx)
    if not isinstance(box2, nd.NDArray):
        box2 = nd.array(box2, ctx=ctx)
    box1 = nd.abs(box1)
    box2 = nd.abs(box2)

    if transform:
        tmp_box1 = box1.copy()
        tmp_box1[:, 0] = box1[:, 0] - box1[:, 2] / 2.0
        tmp_box1[:, 1] = box1[:, 1] - box1[:, 3] / 2.0
        tmp_box1[:, 2] = box1[:, 0] + box1[:, 2] / 2.0
        tmp_box1[:, 3] = box1[:, 1] + box1[:, 3] / 2.0
        box1 = tmp_box1
        tmp_box2 = box2.copy()
        tmp_box2[:, 0] = box2[:, 0] - box2[:, 2] / 2.0
        tmp_box2[:, 1] = box2[:, 1] - box2[:, 3] / 2.0
        tmp_box2[:, 2] = box2[:, 0] + box2[:, 2] / 2.0
        tmp_box2[:, 3] = box2[:, 1] + box2[:, 3] / 2.0
        box2 = tmp_box2
    # Get the coordinates of bounding boxes
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]

    # get the corrdinates of the intersection rectangle
    inter_rect_x1 = nd.where(b1_x1 > b2_x1, b1_x1, b2_x1)
    inter_rect_y1 = nd.where(b1_y1 > b2_y1, b1_y1, b2_y1)
    inter_rect_x2 = nd.where(b1_x2 < b2_x2, b1_x2, b2_x2)
    inter_rect_y2 = nd.where(b1_y2 < b2_y2, b1_y2, b2_y2)

    # Intersection area
    inter_area = nd.clip(
        inter_rect_x2 - inter_rect_x1 + 1, a_min=0, a_max=10000) * nd.clip(
            inter_rect_y2 - inter_rect_y1 + 1, a_min=0, a_max=10000)

    # Union Area
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
    iou = inter_area / (b1_area + b2_area - inter_area)

    # iou[inter_area >= b1_area] = 0.8
    # iou[inter_area >= b2_area] = 0.8
    return nd.clip(iou, 1e-5, 1. - 1e-5)
Beispiel #12
0
    def update(self, obs, returns, masks, actions, values, logpacs, lrnow,
               cliprange_now):
        advantages = returns - values
        advantages = (advantages - advantages.mean()) / (advantages.std() +
                                                         1e-8)
        advantages = nd.array(advantages,
                              ctx=self.args.ctx)  # .reshape((-1, 1))

        obs = np.transpose(obs, (0, 3, 1, 2))
        obs = nd.array(obs, ctx=self.args.ctx)
        actions = nd.array(actions, ctx=self.args.ctx).reshape((-1, 1))
        values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1))
        returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1))
        oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape((-1, 1))

        # self.trainer.set_learning_rate(lrnow)

        # Auto grad
        with autograd.record():
            # Value loss
            vpred, logits = self.net(obs)
            vpred_clipped = values + nd.clip(vpred - values, -cliprange_now,
                                             cliprange_now)
            vf_loss1 = nd.square(vpred - returns)
            vf_loss2 = nd.square(vpred_clipped - returns)
            vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2))

            # Action loss
            # pi_log_prob = self.net.log_prob(logits, actions)
            pi_log_prob = nd.pick(logits, actions, 1)
            ratio = nd.exp(pi_log_prob - oldpi_log_prob)
            surr1 = ratio * advantages
            surr2 = nd.clip(ratio, 1.0 - cliprange_now,
                            1.0 + cliprange_now) * advantages
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))

            # Entropy term
            # entropy = self.net.entropy(logits)

            # Total loss
            # loss = vf_loss * self.args.value_coefficient + actor_loss
            # - entropy * self.args.entropy_coefficient
            loss = vf_loss + actor_loss

        # Compute gradients and updates
        loss.backward()
        self.trainer.step(obs.shape[0])

        return actor_loss.asscalar(), vf_loss.asscalar()  #, entropy.asscalar()
Beispiel #13
0
def log_rmse(net, features, labels):
    # 将小于1的值设成1,使得取对数时数值更稳定
    # float('inf') 表示无穷大,
    # 所以nd.clip(net(features), 1, float('inf'))执行完成后只会将小于1的变为1
    clipped_preds = nd.clip(net(features), 1, float('inf'))
    rmse = nd.sqrt(2 * loss(clipped_preds.log(), labels.log()).mean())
    return rmse.asscalar()
Beispiel #14
0
    def forward(self, rcnn_cls_pred, rcnn_bbox_pred, rcnn_cls_gt,
                rcnn_bbox_gt):
        with autograd.pause():
            ctx = rcnn_cls_pred.context
            roi_num = rcnn_cls_pred.shape[0]
            roi_idx = nd.arange(roi_num, ctx=ctx).reshape(-1, 1)
            fg_bbox_mask = (rcnn_cls_gt > 0).reshape(0, 1, 1)
            bbox_weights = nd.zeros_like(rcnn_bbox_gt).reshape(0, -1, 4)
            bbox_weights[roi_idx, rcnn_cls_gt[:], :] = \
                self._bbox_weights.data(ctx).broadcast_to((roi_num, 1, 4)) * fg_bbox_mask
            bbox_weights = bbox_weights.reshape(0, -1)

        # rcnn_cls_pred.shape (roi_num, num_classes)
        rcnn_cls_log = nd.log(nd.clip(rcnn_cls_pred, 1e-14, 1))
        cls_log_loss = -nd.sum(rcnn_cls_log[
            roi_idx, rcnn_cls_gt]) / self._roi_batch_size.data(ctx)

        # rcnn_bbox_pred.shape (roi_num, num_classes*4)
        rcnn_bbox_smooth_l1 = nd.smooth_l1(rcnn_bbox_pred - rcnn_bbox_gt,
                                           scalar=1.0)
        bbox_smooth_l1_loss = nd.sum(
            rcnn_bbox_smooth_l1 *
            bbox_weights) / self._roi_batch_size.data(ctx)

        return cls_log_loss, bbox_smooth_l1_loss
Beispiel #15
0
    def add(self, bg_batch, r_max, add_rate=1.0):
        ctx = bg_batch.context
        bs = bg_batch.shape[0]
        h = bg_batch.shape[2]
        w = bg_batch.shape[3]

        mask_batch = nd.zeros_like(bg_batch)
        image_batch = nd.zeros_like(bg_batch)
        label_batch = nd.ones((bs, 1, 10), ctx=ctx) * (-1)

        for i in range(bs):
            if np.random.rand() > add_rate:
                continue

            LP, LP_type, _ = self.draw_LP()

            output_size = (h, w)
            input_size = (self.project_rect_6d.camera_h,
                          self.project_rect_6d.camera_w)

            mask, image, label = self.random_projection_LP_6D(
                LP, input_size, output_size, r_max)

            mask_batch[i] = mask.as_in_context(ctx)
            image_batch[i] = image.as_in_context(ctx)
            label_batch[i, :, :-1] = label
            label_batch[i, :, -1] = LP_type

        img_batch = bg_batch * (1 - mask_batch) + image_batch * mask_batch
        img_batch = nd.clip(img_batch, 0, 1)

        return img_batch, label_batch
Beispiel #16
0
    def old_update(self, b_s, b_a, b_r, b_logpac):
        b_s = nd.array(b_s, ctx=self.args.ctx).reshape(
            (-1, self.observation_dim))
        b_a = nd.array(b_a, ctx=self.args.ctx).reshape((-1, self.action_dim))
        b_r = nd.array(b_r, ctx=self.args.ctx).reshape((-1, 1))
        b_oldpi_log_prob = nd.array(b_logpac, ctx=self.args.ctx).reshape(
            (-1, self.action_dim))

        with autograd.record():
            # Value loss
            v_pred, mu, sigma = self.net(b_s)
            advantage = b_r - v_pred
            vf_loss = nd.mean(nd.square(advantage))

            # Detach from the computation graph
            advantage = advantage.detach()

            # Action loss
            pi_log_prob = self.net.log_prob(b_a, mu, sigma)
            ratio = nd.exp(pi_log_prob - b_oldpi_log_prob)
            surr1 = ratio * advantage
            surr2 = nd.clip(ratio, 1.0 - self.args.clip_param,
                            1.0 + self.args.clip_param) * advantage
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))
            entropy = self.net.entropy(sigma)

            # Total (maximize entropy to encourage exploration)
            loss = vf_loss * self.args.value_coefficient + actor_loss \
                    - entropy * self.args.entropy_coefficient

        loss.backward()
        self.trainer.step(b_s.shape[0])
def log_rmse(net,features,labels):
    # <1的数设置为1,取对数时候的值就会更稳定!
    # limits the values of a tensor to between min and max.[nd.clip(x,min,max)]
    clipped_preds = nd.clip(net(features),1,float('inf'))
    # 下面的2是为了抵消掉L2Loss的自带1/2的乘子 --->  Σ(y-y_hat)²
    rmse = nd.sqrt(2*loss(clipped_preds.log(),labels.log()).mean())
    return rmse.asscalar()
Beispiel #18
0
def get_rmse_log(net, X_train, y_train):
    """Gets root mse between the logarithms of the prediction and the truth."""
    num_train = X_train.shape[0]
    clipped_preds = nd.clip(net(X_train), 1, float('inf'))
    return np.sqrt(2 * nd.sum(
        square_loss(nd.log(clipped_preds), nd.log(y_train))).asscalar() /
                   num_train)
Beispiel #19
0
    def _compute_yolo_iou(self, F, boxes1, boxes2):
        '''
        IoU of corresponding anchors
        '''

        # to corner representation
        x11 = boxes1[:, :, :, :, 0] - boxes1[:, :, :, :, 2] / 2.0
        y11 = boxes1[:, :, :, :, 1] - boxes1[:, :, :, :, 3] / 2.0
        x12 = boxes1[:, :, :, :, 0] + boxes1[:, :, :, :, 2] / 2.0
        y12 = boxes1[:, :, :, :, 1] + boxes1[:, :, :, :, 3] / 2.0
        boxes1_new = nd.stack([x11, y11, x12, y12], axis=-1)
        x21 = boxes2[:, :, :, :, 0] - boxes2[:, :, :, :, 2] / 2.0
        y21 = boxes2[:, :, :, :, 1] - boxes2[:, :, :, :, 3] / 2.0
        x22 = boxes2[:, :, :, :, 0] + boxes2[:, :, :, :, 2] / 2.0
        y22 = boxes2[:, :, :, :, 1] + boxes2[:, :, :, :, 3] / 2.0
        boxes2_new = nd.stack([x21, y21, x22, y22], axis=-1)

        # calculating 2 border points
        upperleft = nd.maximum(boxes1_new[:, :, :, :, :2],
                               boxes2_new[:, :, :, :, :2])
        lowerright = nd.minimum(boxes1_new[:, :, :, :, 2:],
                                boxes2_new[:, :, :, :, 2:])

        intersection_dims = nd.maximum(0.0, lowerright - upperleft)
        intersection_area = intersection_dims[:, :, :, :,
                                              0] * intersection_dims[:, :, :, :,
                                                                     1]

        area1 = boxes1_new[:, :, :, :, 3] * boxes1_new[:, :, :, :, 2]
        area2 = boxes2_new[:, :, :, :, 3] * boxes2_new[:, :, :, :, 2]

        union_area = nd.maximum(1e-8, area1 + area2 - intersection_area)

        return nd.clip(intersection_area / union_area, a_min=0.0, a_max=1.0)
 def implement_1(self, x, label):
     '''
     following paper to implement
     '''
     #  weight normalize
     with x.context:
         w = self.weight.data()
     w_norm = w / nd.sqrt(nd.sum(nd.power(w, 2), axis=1)).reshape((-1, 1))
     #  cos_theta = x'w/|x|. note: |w| = 1
     x_norm = nd.power(x, 2)
     x_norm = nd.sum(x_norm, axis=1)
     x_norm = nd.sqrt(x_norm)
     cos_theta = nd.dot(x, w_norm, transpose_b=True)
     cos_theta = cos_theta / x_norm.reshape((-1, 1))
     cos_theta = nd.clip(cos_theta, -1, 1)
     #  cos_m_theta = cos(m * theta)
     cos_m_theta = self.margin_cos[self.margin](cos_theta)
     #  k
     with mx.autograd.pause():
         theta = nd.arccos(cos_theta)
         k = nd.sign((self.margin * theta / math.pi))
     #  i=j is phi_theta and i!=j is cos_theta
     phi_theta = ((-1)**k) * cos_m_theta - 2 * k
     x_norm_phi_theta = x_norm.reshape((-1, 1)) * phi_theta
     x_norm_cos_theta = x_norm.reshape((-1, 1)) * cos_theta
     #  i=j index
     with mx.autograd.pause():
         index = nd.one_hot(label, x_norm_phi_theta.shape[1])
     #  output
     with mx.autograd.pause():
         lamb = self.__get_lambda()
     output = x_norm_cos_theta * 1.0
     output = output - x_norm_cos_theta * index / (1 + lamb)
     output = output + x_norm_phi_theta * index / (1 + lamb)
     return output
def log_rmse(features, labels, net, loss):
    print('1   ', net.collect_params())
    print('2   ', features)
    print('3   ', net(features))
    clipped_preds = nd.clip(net(features), 1, float('inf'))
    rmse = nd.sqrt((2 * loss(clipped_preds.log(), labels.log())).mean())
    return rmse.asscalar()
Beispiel #22
0
def dynamic_range_compression(x, c=1, clip_val=1e-5):
    """
    params
    ------
    c: compression factor
    """
    return nd.log(nd.clip(x, a_min=clip_val, a_max=x.max().asscalar())) * c
Beispiel #23
0
def embedding(data_iterator, net, ctx=mx.cpu()):
    convnet_codes = None
    resize_images = None
    labels = None
    for i, batch in enumerate(data_iterator):
        data, label = _get_batch(batch, ctx)
        idx = nd.arange(data.shape[0])
        _, output = net(data)
        output = output[idx.as_in_context(ctx), :, label]
        output.wait_to_read()
        if convnet_codes is None:
            convnet_codes = output
        else:
            convnet_codes = nd.concat(*[convnet_codes, output], dim=0)
        if labels is None:
            labels = label
        else:
            labels = nd.concat(*[labels, label], dim=0)
        images = data.copyto(mx.cpu())
        if images.shape[1] != 1:
            images[:, 0, :, :] += 0.4914
            images[:, 1, :, :] += 0.4822
            images[:, 2, :, :] += 0.4465
        images = nd.clip(images * 255, 0, 255).astype('uint8')
        if resize_images is None:
            resize_images = images
        else:
            resize_images = nd.concat(*[resize_images, images], dim=0)
    nd.save('convet.ndarray', convnet_codes.as_in_context(mx.cpu()))
    nd.save('resize_image.ndarray', resize_images)
    nd.save('label.ndarray', labels.astype('int32').as_in_context(mx.cpu()))
Beispiel #24
0
def mmd_loss(x, y, ctx_model, t=0.1, kernel='diffusion'):
    '''
    computes the mmd loss with information diffusion kernel
    :param x: batch_size x latent dimension
    :param y:
    :param t:
    :return:
    '''
    eps = 1e-6
    n,d = x.shape
    if kernel == 'tv':
        sum_xx = nd.zeros(1, ctx=ctx_model)
        for i in range(n):
            for j in range(i+1, n):
                sum_xx = sum_xx + nd.norm(x[i] - x[j], ord=1)
        sum_xx = sum_xx / (n * (n-1))

        sum_yy = nd.zeros(1, ctx=ctx_model)
        for i in range(y.shape[0]):
            for j in range(i+1, y.shape[0]):
                sum_yy = sum_yy + nd.norm(y[i] - y[j], ord=1)
        sum_yy = sum_yy / (y.shape[0] * (y.shape[0]-1))

        sum_xy = nd.zeros(1, ctx=ctx_model)
        for i in range(n):
            for j in range(y.shape[0]):
                sum_xy = sum_xy + nd.norm(x[i] - y[j], ord=1)
        sum_yy = sum_yy / (n * y.shape[0])
    else:
        qx = nd.sqrt(nd.clip(x, eps, 1))
        qy = nd.sqrt(nd.clip(y, eps, 1))
        xx = nd.dot(qx, qx, transpose_b=True)
        yy = nd.dot(qy, qy, transpose_b=True)
        xy = nd.dot(qx, qy, transpose_b=True)

        def diffusion_kernel(a, tmpt, dim):
            # return (4 * np.pi * tmpt)**(-dim / 2) * nd.exp(- nd.square(nd.arccos(a)) / tmpt)
            return nd.exp(- nd.square(nd.arccos(a)) / tmpt)

        off_diag = 1 - nd.eye(n, ctx=ctx_model)
        k_xx = diffusion_kernel(nd.clip(xx, 0, 1-eps), t, d-1)
        k_yy = diffusion_kernel(nd.clip(yy, 0, 1-eps), t, d-1)
        k_xy = diffusion_kernel(nd.clip(xy, 0, 1-eps), t, d-1)
        sum_xx = (k_xx * off_diag).sum() / (n * (n-1))
        sum_yy = (k_yy * off_diag).sum() / (n * (n-1))
        sum_xy = 2 * k_xy.sum() / (n * n)
    return sum_xx + sum_yy - sum_xy
Beispiel #25
0
def augment(points, xforms, r=None):
    points_xformed = nd.batch_dot(points, xforms, name='points_xformed')
    if r is None:
        return points_xformed

    jitter_data = r * mx.random.normal(shape=points_xformed.shape)
    jitter_clipped = nd.clip(jitter_data, -5 * r, 5 * r, name='jitter_clipped')
    return points_xformed + jitter_clipped
Beispiel #26
0
def calMAE(net, features, labels):
    clipped_preds = nd.clip(net(features), 1, float('inf'))
    mae_error = 0
    i = 0
    for element in (labels.log()-clipped_preds.log()):
        i += 1
        mae_error += element.abs()
    return (mae_error/i).asscalar()
Beispiel #27
0
def bbox_iou(box1, box2, transform=True):
    """
    Returns the IoU of two bounding boxes
    """
    box1 = nd.array(box1)
    box2 = nd.array(box2)
    if box1.size == 0 or box2.size == 0:
        raise ValueError
    box1 = nd.abs(box1)
    box2 = nd.abs(box2)
    if transform:
        tmp_box1 = box1.copy()
        tmp_box1[:, 0] = box1[:, 0] - box1[:, 2] / 2.0
        tmp_box1[:, 1] = box1[:, 1] - box1[:, 3] / 2.0
        tmp_box1[:, 2] = box1[:, 0] + box1[:, 2] / 2.0
        tmp_box1[:, 3] = box1[:, 1] + box1[:, 3] / 2.0
        box1 = tmp_box1
        tmp_box2 = box2.copy()
        tmp_box2[:, 0] = box2[:, 0] - box2[:, 2] / 2.0
        tmp_box2[:, 1] = box2[:, 1] - box2[:, 3] / 2.0
        tmp_box2[:, 2] = box2[:, 0] + box2[:, 2] / 2.0
        tmp_box2[:, 3] = box2[:, 1] + box2[:, 3] / 2.0
        box2 = tmp_box2
    # Get the coordinates of bounding boxes
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]

    # get the corrdinates of the intersection rectangle
    inter_rect_x1 = nd.where(b1_x1 > b2_x1, b1_x1, b2_x1)
    inter_rect_y1 = nd.where(b1_y1 > b2_y1, b1_y1, b2_y1)
    inter_rect_x2 = nd.where(b1_x2 < b2_x2, b1_x2, b2_x2)
    inter_rect_y2 = nd.where(b1_y2 < b2_y2, b1_y2, b2_y2)

    # Intersection area
    inter_area = nd.clip(
        inter_rect_x2 - inter_rect_x1 + 1, a_min=0, a_max=10000) * nd.clip(
            inter_rect_y2 - inter_rect_y1 + 1, a_min=0, a_max=10000)

    # Union Area
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
    iou = inter_area / (b1_area + b2_area - inter_area)
    # iou[inter_area >= b1_area] = 0.8
    # iou[inter_area >= b2_area] = 0.8
    # iou[inter_area >= b2_area] = 0.8
    return nd.clip(iou, 1e-5, 1. - 1e-5)
Beispiel #28
0
    def update(self, obs, returns, masks, actions, values, logpacs):
        advantages = returns - values
        # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

        advantages = nd.array(advantages, ctx=self.args.ctx).reshape((-1, 1))
        obs = nd.array(obs, ctx=self.args.ctx).reshape(
            (-1, self.observation_dim))
        actions = nd.array(actions, ctx=self.args.ctx).reshape(
            (-1, self.action_dim))
        values = nd.array(values, ctx=self.args.ctx).reshape((-1, 1))
        returns = nd.array(returns, ctx=self.args.ctx).reshape((-1, 1))
        oldpi_log_prob = nd.array(logpacs, ctx=self.args.ctx).reshape(
            (-1, self.action_dim))

        # Learning rate scheduling
        # self.trainer.set_learning_rate(lr)

        # Auto grad
        with autograd.record():
            # Value loss
            vpred, mu, sigma = self.net(obs)
            vpred_clipped = values + nd.clip(
                vpred - values, -self.args.clip_param, self.args.clip_param)
            vf_loss1 = nd.square(vpred - returns)
            vf_loss2 = nd.square(vpred_clipped - returns)
            vf_loss = nd.mean(nd.maximum(vf_loss1, vf_loss2))

            # Action loss
            pi_log_prob = self.net.log_prob(actions, mu, sigma)
            ratio = nd.exp(pi_log_prob - oldpi_log_prob)
            surr1 = ratio * advantages
            surr2 = nd.clip(ratio, 1.0 - self.args.clip_param,
                            1.0 + self.args.clip_param) * advantages
            actor_loss = -nd.mean(nd.minimum(surr1, surr2))

            # Entropy term
            entropy = self.net.entropy(sigma)

            # Total loss
            loss = vf_loss * self.args.value_coefficient + actor_loss \
                        - entropy * self.args.entropy_coefficient

        # Compute gradients and updates
        loss.backward()
        self.trainer.step(obs.shape[0])
Beispiel #29
0
def yolo2_feature_spliter(feature, num_classes, anchor_scales):
    '''
    Transpose/Reshape/Organize convolution outputs.
    '''
    stride = num_classes + 5
    feature = nd.transpose(feature, [0, 2, 3, 1])  #(32,16,16,14)
    feature = feature.reshape((0, 0, 0, -1, stride))  #(32,16,16,2,7)
    # class probs
    cls_pred = feature.slice_axis(begin=0, end=num_classes, axis=-1)
    # object score
    score_pred = feature.slice_axis(begin=num_classes,
                                    end=num_classes + 1,
                                    axis=-1)
    scores = nd.sigmoid(score_pred)
    # center prediction, in range(0,1) for each grid
    xy_pred = feature.slice_axis(begin=num_classes + 1,
                                 end=num_classes + 3,
                                 axis=-1)
    xy = nd.sigmoid(xy_pred)
    #pdb.set_trace()
    # 注意:此时的每个grid的中心坐标(x,y)表示的是位于当前grid cell的相对位置, 在最后预测阶段使用的是相对于全图的位置
    x, y = transform_center(xy)

    # width/height prediction
    wh = feature.slice_axis(begin=num_classes + 3,
                            end=num_classes + 5,
                            axis=-1)
    # 同理,在后面的预测阶段需要将长度和宽度转换为相对于全图的长、宽
    #pdb.set_trace()
    w, h = transform_size(wh, anchor_scales)

    # final class prediction
    category = nd.argmax(cls_pred, axis=-1, keepdims=True)

    # 注意:训练阶段使用的是【中心+长宽】的bbox,而最终预测阶段使用的思【左上角+右下角】的bbox,故提前准备好预测使用的bbox(都是相对全图的坐标)
    # 注意:一个细节:某些预测bbox的中心坐标可能位于图像边缘,且长宽已超出边界。这样当转换为corner坐标会出现负的或大于1.
    left = nd.clip(x - w / 2, 0, 1)
    top = nd.clip(y - h / 2, 0, 1)
    right = nd.clip(x + w / 2, 0, 1)
    bottom = nd.clip(y + h / 2, 0, 1)

    output_to_draw = nd.concat(*[category, scores, left, top, right, bottom],
                               dim=-1)
    # 注意:这里必须加星号。否则 mxnet AssertionError: Positional arguments must have NDArray type, but got [...
    return output_to_draw, cls_pred, scores, nd.concat(*[xy, wh], dim=-1)
Beispiel #30
0
    def step(self, indices, weights, grads, states):
        for index, weight, grad, state in zip(indices, weights, grads, states):
            self._update_count(index)
            lr = self._get_lr(index)
            wd = self._get_wd(index)
            step, exp_avg, exp_avg_sq, slow_buffer = state
            step[0] += 1

            # preprocess grad
            grad *= self.rescale_grad
            if self.clip_gradient is not None:
                grad = nd.clip(grad, -self.clip_gradient, self.clip_gradient)
            grad += wd * weight

            # Gradient Centralization operation for Conv layers and FC layers
            if self.use_gc and len(grad.shape) > self.gc_gradient_threshold:
                grad = grad - grad.mean(axis=tuple(range(1, len(grad.shape))),
                                        keepdims=True)

            # compute mean moving avg and variance moving avg
            exp_avg[:] = (exp_avg * self.beta1) + ((1 - self.beta1) * grad)
            exp_avg_sq[:] = (exp_avg_sq * self.beta2) + (
                (1 - self.beta2) * grad * grad)

            buffered = self.radam_buffer[int(step[0] % 10)]

            if step[0] == buffered[0]:
                N_sma, step_size = buffered[1], buffered[2]
            else:
                buffered[0] = step[0]
                beta2_t = self.beta2**step[0]
                N_sma_max = 2 / (1 - self.beta2) - 1
                N_sma = N_sma_max - 2 * step[0] * beta2_t / (1 - beta2_t)
                buffered[1] = N_sma
                if N_sma > self.n_sma_threshhold:
                    step_size = math.sqrt(
                        (1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) *
                        (N_sma - 2) / N_sma * N_sma_max /
                        (N_sma_max - 2)) / (1 - self.beta1**step[0])
                else:
                    step_size = 1.0 / (1 - self.beta1**step[0])
                buffered[2] = step_size
                self.radam_buffer[int(step[0] % 10)] = buffered

            # apply lr
            new_lr = -step_size * lr
            if N_sma > self.n_sma_threshhold:
                denom = exp_avg_sq.sqrt() + self.epsilon
                weight[:] += new_lr * (exp_avg / denom)
            else:
                weight[:] += new_lr * exp_avg

            # integrated look ahead
            if step[0] % self.k == 0:
                slow_buffer[:] += (weight - slow_buffer) * self.alpha
                weight[:] = slow_buffer
Beispiel #31
0
def test_clip():
    a = nd.arange(0, LARGE_X).reshape(LARGE_X, 1)
    b = nd.broadcast_to(a, shape=(a.shape[0], SMALL_Y))
    res = nd.clip(b, a_min=100, a_max=1000)
    assert np.sum(res[-1].asnumpy() == 1000) == b.shape[1]
Beispiel #32
0
 def forward(self, x):
     return nd.clip(x, self._low, self._high)