def topk(hm, k=100): ctx = hm.context batch_size, cat, height, width = hm.shape hm = nms(hm) hm = nd.reshape(hm, (0, 0, -1)) topk_scores, topk_idx = nd.topk(hm, k=k, ret_typ='both') topk_x_idx = nd.floor(topk_idx/width) topk_x_idx = nd.reshape(topk_x_idx, (0, -1)) topk_y_idx = (topk_idx%height) topk_y_idx = nd.reshape(topk_y_idx, (0, -1)) topk_scores = nd.reshape(topk_scores, (0, -1)) topk_cat_scores, topk_cat_idx = nd.topk(topk_scores, k=k, ret_typ='both') cls_id = nd.floor(topk_cat_idx/k) batch_idx = nd.repeat(nd.arange(batch_size), repeats=k).reshape((1, -1)) batch_idx = batch_idx.as_in_context(ctx) topk_cat_idx = nd.reshape(topk_cat_idx, (1, -1)) topk_cat_idices = nd.concat(batch_idx, topk_cat_idx, dim=0) topk_cat_x_idx = nd.gather_nd(topk_x_idx, topk_cat_idices) topk_cat_x_idx = nd.reshape(topk_cat_x_idx, (batch_size, k)) topk_cat_y_idx = nd.gather_nd(topk_y_idx, topk_cat_idices) topk_cat_y_idx = nd.reshape(topk_cat_y_idx, (batch_size, k)) return topk_cat_x_idx, topk_cat_y_idx, cls_id
def get_final_preds(batch_heatmaps, center, scale): from gluoncv.data.transforms.pose import get_max_pred coords, maxvals = get_max_pred(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: diff = nd.concat(hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) # Transform back for i in range(coords.shape[0]): w_ratio = coords[i][:, 0] / heatmap_width h_ratio = coords[i][:, 1] / heatmap_height preds[i][:, 0] = scale[i][0] * 2 * w_ratio + center[i][0] - scale[i][0] preds[i][:, 1] = scale[i][1] * 2 * h_ratio + center[i][1] - scale[i][1] return preds, maxvals
def get_final_preds(batch_heatmaps, center, scale): coords, maxvals = get_max_pred(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: diff = nd.concat(hm[py][px+1] - hm[py][px-1], hm[py+1][px] - hm[py-1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def heatmap_to_coord_alpha_pose(hms, boxes): hm_h = hms.shape[2] hm_w = hms.shape[3] coords, maxvals = get_max_pred(hms) if boxes.shape[1] == 1: pt1 = mx.nd.array(boxes[:, 0, (0, 1)], dtype=hms.dtype) pt2 = mx.nd.array(boxes[:, 0, (2, 3)], dtype=hms.dtype) else: assert boxes.shape[1] == 4 pt1 = mx.nd.array(boxes[:, (0, 1)], dtype=hms.dtype) pt2 = mx.nd.array(boxes[:, (2, 3)], dtype=hms.dtype) # post-processing for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = hms[n][p] px = int(nd.floor(coords[n][p][0] + 0.5).asscalar()) py = int(nd.floor(coords[n][p][1] + 0.5).asscalar()) if 1 < px < hm_w - 1 and 1 < py < hm_h - 1: diff = nd.concat(hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px], dim=0) coords[n][p] += nd.sign(diff) * .25 preds = nd.zeros_like(coords) for i in range(hms.shape[0]): for j in range(hms.shape[1]): preds[i][j] = transformBoxInvert(coords[i][j], pt1[i], pt2[i], hm_h, hm_w) return preds, maxvals
def train(self,epochs): for i in range(epochs): efficiency = 0 cumuLoss = 0 for j in range(self.nbIter): z = nd.round(nd.random.uniform(0,1,(self.batchSize,self.code.k),ctx=self.ctx)) x = nd.dot(z,self.code.G)%2 noiseBSC = nd.random.uniform(0.01,0.99,(self.batchSize,self.code.n),ctx=self.ctx) noiseBSC = nd.floor(noiseBSC/nd.max(noiseBSC,axis=(1,)).reshape((self.batchSize,1))) y = (x + noiseBSC)%2 with autograd.record(): zHat = self.net(y) loss = self.SE(zHat,z) loss.backward() self.adam(self.params,self.vs,self.sqrs, self.lr, self.batchSize, self.t) self.t+=1 cumuLoss += loss.asscalar() zHat = nd.round(zHat) efficiency += nd.sum(nd.equal(zHat,z)).asscalar() Pc = efficiency/(self.batchSize*self.nbIter*self.code.k) Pe = 1 - Pc normCumuLoss = cumuLoss/(self.batchSize*self.nbIter*self.code.k) print("Epochs %d: Pe = %lf , loss = %lf" % (i,Pe,normCumuLoss))
def quantize_to(x, bits=8): max_v = nd.max(nd.abs(x)) if max_v == 0: return x.astype(np.int8), 8 int_len = nd.ceil(nd.log2(max_v)).asscalar() sb = bits - int_len f = 2**sb y = nd.floor(x * f) y = nd.clip(y, a_min=-2**(bits - 1), a_max=2**(bits - 1) - 1) return y, sb
def prep_final_label(labels, num_classes, input_dim=416): ctx = labels.context anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], ctx=ctx) anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5), dtype="float32", ctx=ctx) label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5), dtype="float32", ctx=ctx) label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5), dtype="float32", ctx=ctx) true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx) true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx) true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx) label_list = [label_1, label_2, label_3] true_label_list = [true_label_1, true_label_2, true_label_3] for x_box in range(labels.shape[0]): if labels[x_box, 4] == 0.0: break for i in range(3): stride = 2 ** i * 13 tmp_anchors = anchors[anchors_mask[i]] tmp_xywh = nd.repeat(nd.expand_dims(labels[x_box, :4] * stride, axis=0), repeats=tmp_anchors.shape[0], axis=0) anchor_xywh = tmp_xywh.copy() anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0) label = labels[x_box].copy() tmp_idx = nd.floor(label[:2] * stride) label[:2] = label[:2] * stride label[:2] -= tmp_idx tmp_idx = tmp_idx.astype("int") label[2:4] = nd.log(label[2:4] * input_dim / tmp_anchors[best_anchor].reshape(-1) + 1e-12) label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label true_xywhs = labels[x_box, :5] * input_dim true_xywhs[4] = 1.0 true_label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = true_xywhs t_y = nd.concat(label_1.reshape((-1, num_classes + 5)), label_2.reshape((-1, num_classes + 5)), label_3.reshape((-1, num_classes + 5)), dim=0) t_xywhs = nd.concat(true_label_1.reshape((-1, 5)), true_label_2.reshape((-1, 5)), true_label_3.reshape((-1, 5)), dim=0) return t_y, t_xywhs
def get_max_pred(batch_heatmaps): batch_size = batch_heatmaps.shape[0] num_joints = batch_heatmaps.shape[1] width = batch_heatmaps.shape[3] heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) idx = nd.argmax(heatmaps_reshaped, 2) maxvals = nd.max(heatmaps_reshaped, 2) maxvals = maxvals.reshape((batch_size, num_joints, 1)) idx = idx.reshape((batch_size, num_joints, 1)) preds = nd.tile(idx, (1, 1, 2)).astype(np.float32) preds[:, :, 0] = (preds[:, :, 0]) % width preds[:, :, 1] = nd.floor((preds[:, :, 1]) / width) pred_mask = nd.tile(nd.greater(maxvals, 0.0), (1, 1, 2)) pred_mask = pred_mask.astype(np.float32) preds *= pred_mask return preds, maxvals
def getUniqueMatch(iou, min_threshold=1e-12): N, M = iou.shape iouf = iou.reshape((-1,)) argmax = nd.argsort(iouf, is_ascend=False) argrow = nd.floor(nd.divide(argmax, M)) argcol = nd.modulo(argmax, M) uniquel = set() uniquer = set() match = nd.ones((N,)) * -1 i = 0 while True: if argcol[i].asscalar() not in uniquel and argrow[i].asscalar() not in uniquer: uniquel.add(argcol[i].asscalar()) uniquer.add(argrow[i].asscalar()) if iou[argrow[i], argcol[i]] > min_threshold: match[argrow[i]] = argcol[i] if len(uniquel) == M or len(uniquer) == N: break i += 1 return match.reshape((1,-1))
def quantize_vector(x, bits=8): """Quantize vertor with precision 'bits' Parameters ---------- x: NDArray shape is (1, n) bits: int vector after quantize preserve bits' precision Returns ------- y, sb: vector after quantization should be left_shift 'sb' bit to backward original value. """ max_v = nd.max(nd.abs(x)) if max_v == 0: return x.astype(np.int8), 8 int_len = nd.ceil(nd.log2(max_v)).asscalar() sb = bits - int_len f = 2**sb y = nd.floor(x * f) y = nd.clip(y, a_min=-2**(bits - 1), a_max=2**(bits - 1) - 1) return y, sb
def triangle(x, y): x = nd.abs(x) x_floor = nd.floor(x) x = nd.where(nd.modulo(x_floor, 2), 1 - x + x_floor, x - x_floor) return y - x > 0
def check_floor(): y = nd.floor(x) # expected ouput for middle 5 values after applying floor() expected_output = [-1, -1, 0, 0, 1] assert_correctness_of_rounding_ops(y, LARGE_X // 2, expected_output)
def transform(data, label): return (nd.floor(data/128)).astype(np.float32), label.astype(np.float32)
def transform(data, label): return nd.floor(data / 128).astype('float32').squeeze(axis=-1), label
def prep_final_label(labels, num_classes, input_dim=416): # expected format for labels: # [[x, y, w, h, objectivity, class0, class1, ...], # [x, y, w, h, objectivity, class0, class1, ...], # ... # [x, y, w, h, objectivity, class0, class1, ...]] # Shape: (30, 5 + num_classes) # TODO The number of labels is hardcoded to 30, I think this is the max # number of items in a single image in the dataset. I should check on that ctx = labels.context # These anchors are borrows from those calculated on the COCO dataset from # the yolo v3 paper anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], ctx=ctx) # This determines which bounding boxes to use at the different pyramids # Looks like the idea is to locate the larger anchor boxes at the # smaller feature maps i.e. further downstream of network anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # create empty labels that will eventually contains ground-truth labels with # dimensions relative to the feature map anchor boxes label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5), dtype="float32", ctx=ctx) label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5), dtype="float32", ctx=ctx) label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5), dtype="float32", ctx=ctx) # create empty labels that will eventually contain ground-truth labels with # dimensions relative to the source input image true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx) true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx) true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx) label_list = [label_1, label_2, label_3] true_label_list = [true_label_1, true_label_2, true_label_3] # loop over the individual labels in this hardcoded label file for x_box in range(labels.shape[0]): # if the objectivity score is 0, then we don't care about this label if labels[x_box, 4] == 0.0: break # loop from 0-2 to handle the different pyramid scales for i in range(3): # stride == The size of the current feature map stride = 2**i * 13 # the anchor boxes to reference at this pyramid level tmp_anchors = anchors[anchors_mask[i]] # scale the xywh to the current feature map size so the coordinates # are relative to the feature map # then repeat those values across dimension 0 so we can determine # which bounding box has the highest IoU tmp_xywh = nd.repeat(nd.expand_dims(labels[x_box, :4] * stride, axis=0), repeats=tmp_anchors.shape[0], axis=0) # copy the previous tensor and plug in the bounding box height and # width. This allows us to retain the correct bounding box centers # and only change the bounding box size # Note that we are scaling the bounding box wh so that they are also # relative to the size of the feature map anchor_xywh = tmp_xywh.copy() anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride # determine which of these bounding boxes has the highest IoU and # thus is the best anchorbox for this label best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0) label = labels[x_box].copy() # scale the offsets again (TODO why do this again?), make sure # we have nice round numbers tmp_idx = nd.floor(label[:2] * stride) # TODO We don't need to calculate this twice label[:2] = label[:2] * stride # subtract the floored values so that we just get an offset from the # origin of this feature location scaled 0-1 label[:2] -= tmp_idx tmp_idx = tmp_idx.astype("int") # calculate the offset of the ground truth from our best fit anchor # box based on equation `p * e ^ (t) where `p` is the anchor box and # `t` is the ground truth label label[2:4] = nd.log(label[2:4] * input_dim / tmp_anchors[best_anchor].reshape(-1) + 1e-12) # flip the x and y coordinates for some reason (TODO why? does this # work the other way?) and assign to correct grid location and # anchor box. (TODO this doesn't allow for multiple objects in the # same grid location with the same bounding box. what do in that # case?) label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label # scale what we just figured out to the size of the original image # for convenience of display, I guess??? TODO why do this? true_xywhs = labels[x_box, :5] * input_dim true_xywhs[4] = 1.0 true_label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = true_xywhs # reshape our network label so it is shape (num_bounding_boxes, 5 + class_count) t_y = nd.concat(label_1.reshape((-1, num_classes + 5)), label_2.reshape((-1, num_classes + 5)), label_3.reshape((-1, num_classes + 5)), dim=0) # reshape our human labels so it is shape(num_bounding_boxes, 5) t_xywhs = nd.concat(true_label_1.reshape((-1, 5)), true_label_2.reshape((-1, 5)), true_label_3.reshape((-1, 5)), dim=0) return t_y, t_xywhs
def triangle(input): x = nd.abs(input[0]) x_floor = nd.floor(x) x = nd.where(nd.modulo(x_floor, 2), 1 - x + x_floor, x - x_floor) return input[1] - x > 0
def prep_final_label(labels, num_classes, input_dim=416): ''' 输入: labels : 416尺寸变形后的结果集标签[30行,[x,y,w,h,pc,c0-c23]=5+24=29] num_classes : 数值=24类 imput_dim : 图像输入卷积的统一尺寸 输出: t_y:[8,10647,7]=[batch_num,13x13x3+26x26x3+52x52x3,[tx,ty,tw,th,pc,c1,c2] t_xywhs:[8,10647,5]=[batch_num,13x13x3+26x26x3+52x52x3,[x,y,w,h,pc] ''' ctx = labels.context # define 9 boxs anchors = nd.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], ctx=ctx) # define 3 group anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # define 3 classes size box for label = (size, size, 锚框数=3对, 29) label_1 = nd.zeros(shape=(13, 13, 3, num_classes + 5), dtype="float32", ctx=ctx) label_2 = nd.zeros(shape=(26, 26, 3, num_classes + 5), dtype="float32", ctx=ctx) label_3 = nd.zeros(shape=(52, 52, 3, num_classes + 5), dtype="float32", ctx=ctx) # define 3 classes size box for true label = (size, size, 3, 5) true_label_1 = nd.zeros(shape=(13, 13, 3, 5), dtype="float32", ctx=ctx) true_label_2 = nd.zeros(shape=(26, 26, 3, 5), dtype="float32", ctx=ctx) true_label_3 = nd.zeros(shape=(52, 52, 3, 5), dtype="float32", ctx=ctx) # define list save 3 different size box label and true label label_list = [label_1, label_2, label_3] true_label_list = [true_label_1, true_label_2, true_label_3] # 逐行处理[x,y,w,h,pc,c,.....,c24] m, n = labels.shape for x_box in range(m): ## print(u'正在处理第{}个对象'.format(x_box)) if labels[x_box, 4].asscalar() == 0.0: ## print(u'step labels[{},4]==0.0,退出一幅图片处理。'.format(x_box)) break # 循环得到13,26,52步长 = 三个单元框的大小 13x13 26x26 52x52 for i in range(3): stride = 2**i * 13 tmp_anchors = anchors[anchors_mask[i]] # 得到一组含3个锚框尺寸 [3,2] # 装实y[3,[x,y,w,h]*13] tmp_xywh = nd.repeat( nd.expand_dims(labels[x_box, :4] * stride, axis=0), repeats=tmp_anchors.shape[0], axis=0) # [3, 4]每行代表相同锚框x3,列是[x, y, w, h]*单元框宽 # 装[ix3 , [x, y, 锚框i_w*13/416, 锚框i_h*13/416]] anchor_xywh = tmp_xywh.copy() anchor_xywh[:, 2:4] = tmp_anchors / input_dim * stride # [3, 4] # 得到最接近的锚框序号 best_anchor = nd.argmax(bbox_iou(tmp_xywh, anchor_xywh), axis=0) ## print(u'选最好的预测大小[13,26,52]框的序号是:{}'.format(best_anchor)) # 计算盒子的位置索引 label = labels[x_box].copy( ) # 已经调整尺寸的y shape = [1,29] 行=[x,y,w,h,pc,c0,c1,......,c24] k = nd.floor(label[:2] * stride) label[:2] = label[:2] * stride - k # [x,y]*13 - [x,y]*13取整=余数 ## print(u'索引序号变化结果:{}'.format(label[:2])) tmp_idx = k # [x,y]*13 四写五入=取整 tmp_idx = tmp_idx.astype("int") ## print(u'临时索引的值:{}'.format(tmp_idx)) label[2:4] = nd.log(label[2:4] * input_dim / tmp_anchors[best_anchor].reshape(-1) + 1e-12) true_xywhs = labels[x_box, :5] * input_dim true_xywhs[4] = 1.0 label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = label # here ## print('sum(true_xywhs[4]==1):{}'.format(nd.sum(true_xywhs[4]==1))) true_label_list[i][tmp_idx[1], tmp_idx[0], best_anchor] = true_xywhs t_y = nd.concat(label_list[0].reshape((-1, num_classes + 5)), label_list[1].reshape((-1, num_classes + 5)), label_list[2].reshape((-1, num_classes + 5)), dim=0) t_xywhs = nd.concat(true_label_list[0].reshape((-1, 5)), true_label_list[1].reshape((-1, 5)), true_label_list[2].reshape((-1, 5)), dim=0) return t_y, t_xywhs
def forward(self, features, proposals): """ :param features: OrderedDict, each features: (B, C, H, W) :param proposals: :return: """ device = features[self.feature_map_names[0]].context batch_ids = [ nd.full(len(ps), i, ctx=device) for i, ps in enumerate(proposals) ] B = len(batch_ids) batch_ids = nd.concat(*batch_ids, dim=0) batch_proposals = nd.concat(*proposals, dim=0) if self.use_fpn: proposals = nd.concat(batch_ids.reshape(-1, 1), batch_proposals, dim=1) ws = batch_proposals[:, 2] - batch_proposals[:, 0] hs = batch_proposals[:, 3] - batch_proposals[:, 1] areas = ws * hs ks = nd.floor(4 + nd.log2(nd.sqrt(areas) / 224)) ks = nd.clip(ks, self.levels_min, self.levels_max) ks = ks.asnumpy() batch_indices = np.arange(len(batch_ids)) _batch_ids = [] _roi_features = [] for level, name in self.levels_map.items(): level_indices = batch_indices[ks == level] if len(level_indices) == 0: continue level_batch_ids = batch_ids[level_indices] roi_features = contrib.ndarray.ROIAlign( features[name], proposals[level_indices], (7, 7), 0.5**level) _batch_ids.append(level_batch_ids) _roi_features.append(roi_features) batch_ids = nd.concat(*_batch_ids, dim=0) batch_ids = batch_ids.asnumpy() roi_features = nd.concat(*_roi_features, dim=0) features_split = [] for i in range(B): i_mask = batch_ids == i i_indices = batch_indices[i_mask] features_split.append(roi_features[i_indices]) return features_split else: features = features[self.feature_map_names[0]] features = contrib.ndarray.ROIAlign( features, nd.concat(batch_ids.reshape(-1, 1), batch_proposals, dim=1), (7, 7), 0.5**4) features_split = [] idx = 0 for num_proposals in [len(ps) for ps in proposals]: features_split.append(features[idx:idx + num_proposals]) idx = idx + num_proposals return features_split