def test_anchor(h=40, w=40): x = nd.random_uniform(shape=(1, 3, h, w)) y = MultiBoxPrior(x, sizes=[0.5, 0.25, 0.1], ratios=[1, 2, 0.5]) boxes = y.reshape((h, w, -1, 4)) print('The first anchor box at row 21, column 21:', boxes[20, 20, 0, :]) return boxes
def hybrid_forward(self, F, x): anchors, class_preds, box_preds = [], [], [] scale_1 = self.backbone_fisrthalf(x) anchors.append( MultiBoxPrior(scale_1, sizes=self.sizes[0], ratios=self.ratios[0])) class_preds.append( self.flatten_prediction(self.class_predictors[0](scale_1))) box_preds.append( self.flatten_prediction(self.box_predictors[0](scale_1))) out = self.backbone_secondehalf(scale_1) PC_1 = self.PC_layer[0](scale_1) scale_2 = F.concat(out, PC_1, dim=1) anchors.append( MultiBoxPrior(scale_2, sizes=self.sizes[1], ratios=self.ratios[1])) class_preds.append( self.flatten_prediction(self.class_predictors[1](scale_2))) box_preds.append( self.flatten_prediction(self.box_predictors[1](scale_2))) scale_predict = scale_2 for i in range(1, 5): PC_Predict = self.PC_layer[i](scale_predict) CC_Predict = self.CC_layer[i - 1](scale_predict) scale_predict = F.concat(PC_Predict, CC_Predict, dim=1) anchors.append( MultiBoxPrior(scale_predict, sizes=self.sizes[i + 1], ratios=self.ratios[i + 1])) class_preds.append( self.flatten_prediction( self.class_predictors[i + 1](scale_predict))) box_preds.append( self.flatten_prediction(self.box_predictors[i + 1](scale_predict))) # print(scale_predict.shape) anchors = self.concat_predictions(anchors) class_preds = self.concat_predictions(class_preds) box_preds = self.concat_predictions(box_preds) class_preds = class_preds.reshape(shape=(0, -1, self.num_cls + 1)) return anchors, class_preds, box_preds
def toy_ssd_forward(x, model, sizes, ratios, verbose=False): body, downsamplers, class_predictors, box_predictors = model anchors, class_preds, box_preds = [], [], [] # feature extraction x = body(x) for i in range(5): # predict anchors.append(MultiBoxPrior(x, sizes=sizes[i], ratios=ratios[i])) class_preds.append(flatten_prediction(class_predictors[i](x))) box_preds.append(flatten_prediction(box_predictors[i](x))) # if verbose: # print('Predict scale', i, x.shape, 'with', anchors[-1].shape[1], 'anchors') # print('Predict scale', i, x.shape, 'with', anchors[-1].shape[1], 'anchors') # down sample if i < 3: x = downsamplers[i](x) elif i == 3: x = nd.Pooling(x, global_pool=True, pool_type='max', kernel=(x.shape[2], x.shape[3])) return (concat_predictions(anchors), concat_predictions(class_preds), concat_predictions(box_preds))
def ssd_forward(self, x): ''' Helper function of the forward pass of the sdd ''' x = self.body(x) default_anchors = [] predicted_boxes = [] predicted_classes = [] for i in range(self.num_anchors): default_anchors.append( MultiBoxPrior(x, sizes=self.anchor_sizes[i], ratios=self.anchor_ratios[i])) predicted_boxes.append( self._flatten_prediction(self.box_preds[i](x))) predicted_classes.append( self._flatten_prediction(self.class_preds[i](x))) if i < len(self.downsamples): x = self.downsamples[i](x) elif i == 3: x = nd.Pooling(x, global_pool=True, pool_type='max', kernel=(4, 4)) return default_anchors, predicted_classes, predicted_boxes
def model_forward(x, net, down_samples, class_preds, box_preds, cap_transforms, sizes, ratios): # extract feature with the body network x = net(x) # for each scale, add anchors, box and class predictions, # then compute the input to next scale default_anchors = [] predicted_boxes = [] predicted_classes = [] for i in range(5): default_anchors.append( MultiBoxPrior(x, sizes=sizes[i], ratios=ratios[i])) prime_out = cap_transforms[i](x) class_capout = class_preds[i * 2](prime_out) class_pred = class_preds[i * 2 + 1](class_capout) class_pred = nd.flatten(nd.transpose(class_pred, (0, 2, 3, 1))) ''' class_pred = class_preds[i](x) class_pred = nd.flatten(nd.transpose(class_pred, (0,2,3,1))) ''' box_pred = nd.flatten(nd.transpose(box_preds[i](x), (0, 2, 3, 1))) # print class_pred.shape, box_pred.shape print class_pred.shape # print class_pred.shape predicted_boxes.append(box_pred) predicted_classes.append(class_pred) if i < 3: x = down_samples[i](x) elif i == 3: # simply use the pooling layer x = nd.Pooling(x, global_pool=True, pool_type='max', kernel=(4, 4)) return default_anchors, predicted_classes, predicted_boxes
def hybrid_forward(self, F, x, *args, **kwargs): x = self.feature_extractor(x) anchors = MultiBoxPrior(x, sizes=self.anchor_sizes, ratios=self.anchor_ratios) class_pred = self.class_predictor(x) return anchors, class_pred
def forward(self, x): feat = self.features(x) default_anchors = [] predicted_boxes = [] predicted_classes = [] for i in range(self.n_scales): feat = self.downsamples[i](feat) default_anchors.append( MultiBoxPrior(feat, clip=True, sizes=self.sizes[i], ratios=self.ratios[i])) bp = self.box_preds[i](feat) cp = self.class_preds[i](feat) predicted_boxes.append(self.flatten_prediction(bp)) predicted_classes.append(self.flatten_prediction(cp)) anchors = self.concat_predictions(default_anchors) box_preds = self.concat_predictions(predicted_boxes) class_preds = self.concat_predictions(predicted_classes) class_preds = nd.reshape(class_preds, shape=(0, -1, self.num_classes + 1)) return anchors, box_preds, class_preds
def hybrid_forward(self, F, x, *args, **kwargs): multifeatures, class_predictors, box_predictors = self.model anchors, class_preds, box_preds = [], [], [] for i in range(len(multifeatures)): x = multifeatures[i](x) # normalize # if self.normalizations[i] > 0: # x = F.L2Normalization(data=x, mode="channel") # scale = F.ones(shape=(1, self.num_filters[i], 1, 1)) * 1.0 #self.normalizations[i] # scale = gluon.Parameter(name="{}_scale".format('relu4_3'), grad_req='write', # shape=(1, self.num_filters[i], 1, 1), lr_mult=1.0, # wd_mult=0.1, init=mx.init.Constant(self.normalizations[i])) # scale.initialize(ctx=ctx) # x = F.broadcast_mul(lhs=scale.data(ctx), rhs=x) # predict if self.steps: step = (self.steps[i], self.steps[i]) else: step = '(-1.0, -1.0)' anchors.append(MultiBoxPrior( x, sizes=self.sizes[i], ratios=self.ratios[i], clip=False, steps=step)) if self.normalizations[i] > 0: class_preds.append( flatten_prediction(class_predictors[i](self.normscale(x)))) box_preds.append( flatten_prediction(box_predictors[i](self.normscale(x)))) else: class_preds.append( flatten_prediction(class_predictors[i](x))) box_preds.append( flatten_prediction(box_predictors[i](x))) if self.verbose: print('Predict scale', i, x.shape, 'with', anchors[-1].shape[1], 'anchors') # concat data anchors = F.concat(*anchors, dim=1) class_preds = F.concat(*class_preds, dim=1) box_preds = F.concat(*box_preds, dim=1) # it is better to have class predictions reshaped for softmax computation class_preds = class_preds.reshape(shape=(0, -1, self.num_classes+1)) return anchors, class_preds, box_preds
def toy_ssd_forward(self, x, body, downsamples, class_preds, box_preds, sizes, ratios): # extracted features x = body(x) default_anchors = [] predicted_boxes = [] predicted_classes = [] for i in range(5): default_anchors.append(MultiBoxPrior(x, sizes=sizes[i], ratios=ratios[i])) predicted_boxes.append(self.flatten_prediction(box_preds[i](x))) predicted_classes.append(self.flatten_prediction(class_preds[i](x))) if i < 3: x = downsamples[i](x) elif i == 3: x = nd.Pooling(x, global_pool=True, pool_type='max', kernel=(4, 4)) return default_anchors, predicted_classes, predicted_boxes
def mobile_net_forward(x, body, downsamples, class_preds, box_preds, sizes, ratios): x = body(x) default_anchors = [] predicted_boxes = [] predicted_classes = [] for i in range(5): default_anchors.append(MultiBoxPrior(x, sizes[i], ratios=ratios[i])) predicted_boxes.append(flatten_prediction(box_preds[i](x))) predicted_classes.append(flatten_prediction(class_preds[i](x))) # print(predicted_classes[i].shape) if i < 3: x = downsamples[i](x) elif i == 3: x = nd.Pooling(x, global_pool=True, pool_type='max', kernel=(4, 4)) return default_anchors, predicted_boxes, predicted_classes
def toy_ssd_forward(x, body, downsamples, class_preds, box_preds, sizes, ratios): # extract feature with the body network x = body(x) # for each scale, add anchors, box and class predictions, # then compute the input to next scale default_anchors = [] predicted_boxes = [] predicted_classes = [] for i in range(5): default_anchors.append( MultiBoxPrior(x, sizes=sizes[i], ratios=ratios[i])) predicted_boxes.append(flatten_prediction(box_preds[i](x))) predicted_classes.append(flatten_prediction(class_preds[i](x))) if i < 3: x = downsamples[i](x) elif i == 3: # simply use the pooling layer x = nd.Pooling(x, global_pool=True, pool_type='max', kernel=(4, 4)) return default_anchors, predicted_classes, predicted_boxes
def hybrid_forward(self, F, x): x = self.body(x) cls_preds = [] box_preds = [] anchors = [] for i in range(len(self.sizes_list)): cls_preds.append((self.class_predictors[i](x)).transpose((0, 2, 3, 1)).flatten()) box_preds.append((self.box_predictors[i](x)).transpose((0, 2, 3, 1)).flatten()) anchors.append(MultiBoxPrior(x, sizes=self.sizes_list[i], ratios=self.ratios_list[i])) if self.verbose: print "predict scale", i, x.shape, 'with', anchors[-1].shape, 'anchors' if i < len(self.sizes_list) - 2: x = self.downsamples[i](x) elif i == len(self.sizes_list) - 2: x = F.Pooling(x, global_pool=True, pool_type='max', kernel=(x.shape[2], x.shape[3])) cls_preds = nd.concat(*cls_preds, dim=1).reshape((0, -1, num_class+1)) box_preds = nd.concat(*box_preds, dim=1) anchors = nd.concat(*anchors, dim=1) return anchors, box_preds, cls_preds
def forward(self, x): sources = list() loc = list() conf = list() priors = list() # apply vgg up to conv4_3 relu for k in range(23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # apply vgg up to fc7 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # apply extra layers and cache source layer outputs for k, v in enumerate(self.extras): x = F.relu(v(x)) if k % 2 == 1: sources.append(x) for i, (x, l, c) in enumerate(zip(sources, self.loc, self.conf)): boxes = MultiBoxPrior(x, sizes=self.cfg['sizes'][i], ratios=self.cfg['aspect_ratios'][i], clip=True) priors.append(boxes) l_res = l(x) c_res = c(x) loc.append(flatten_preds(l_res)) conf.append(flatten_preds(c_res)) priors = F.concat(*priors, dim=1) loc = F.concat(*loc, dim=1) conf = F.concat(*conf, dim=1) conf = F.reshape(conf, shape=(0, -1, self.num_classes)) output = (priors, conf, loc) return output
import mxnet as mx from mxnet import nd from mxnet.contrib.ndarray import MultiBoxPrior import matplotlib.pyplot as plt n = 40 # shape: batch x channel x height x weight x = nd.random_uniform(shape=(1, 3, n, n)) y = MultiBoxPrior(x, sizes=[.5, .25, .1], ratios=[1, 2, .5]) # the first anchor box generated for pixel at (20,20) # its format is (x_min, y_min, x_max, y_max) boxes = y.reshape((n, n, -1, 4)) print('The first anchor box at row 21, column 21:', boxes[20, 20, 0, :]) from mxnet.gluon import nn def class_predictor(num_anchors, num_classes): """return a layer to predict classes""" return nn.Conv2D(num_anchors * (num_classes + 1), 3, padding=1) cls_pred = class_predictor(5, 10) cls_pred.initialize() x = nd.zeros((2, 3, 20, 20)) print('Class prediction', cls_pred(x).shape) def box_predictor(num_anchors): """return a layer to predict delta locations""" return nn.Conv2D(num_anchors * 4, 3, padding=1) box_pred = box_predictor(10)
#!/usr/bin/env python #-*- coding:utf-8 -*- import mxnet as mx from mxnet import nd from mxnet.contrib.ndarray import MultiBoxPrior##MultiBoxPrior产生预设框 n = 40 # 输入形状: batch × channel × height × weight x = nd.random_uniform(shape=(1, 3, n, n)) ## 图像 n 个预设尺寸 m 个预设的长宽比 输出为 n+m-1 个方框 y = MultiBoxPrior(x, sizes=[.5, .25, .1], ratios=[1, 2, .5]) ## 取位于 (20,20) 像素点的第一个预设框 # box的格式为 (x_min, y_min, x_max, y_max) 且为比例 boxes = y.reshape((n, n, -1, 4)) print('The first anchor box at row 21, column 21:', boxes[20, 20, 0, :]) import matplotlib.pyplot as plt #"""convert an anchor box to a matplotlib rectangle""" def box_to_rect(box, color, linewidth=3): box = box.asnumpy() return plt.Rectangle( (box[0], box[1]), (box[2]-box[0]), (box[3]-box[1]), fill=False, edgecolor=color, linewidth=linewidth) colors = ['blue', 'green', 'red', 'black', 'magenta']# 3+3-1=5个 plt.imshow(nd.ones((n, n, 3)).asnumpy()) anchors = boxes[20, 20, :, :] for i in range(anchors.shape[0]): plt.gca().add_patch(box_to_rect(anchors[i,:]*n, colors[i])) plt.show()
from config.config import config from collections import namedtuple from cython.heatmap import putGaussianMaps from cython.pafmap import putVecMaps import numpy as np from bbox_transform import * from mxnet import autograd as ag ## define anchor n = 46 # shape: batch x channel x height x weight x = mx.nd.random_uniform(shape=(1, 3, n, n)) y = MultiBoxPrior(x, sizes=[.5], ratios=[1]) # the first anchor box generated for pixel at (20,20) # its format is (x_min, y_min, x_max, y_max) boxes = y.reshape((n, n, -1, 4)) print('The first anchor box at row 21, column 21:', boxes[20, 20, 0, :]) ## author: Liang Dong ## Generate heat map and part affinity map Point = namedtuple('Point', 'x y') crop_size_x = 368 crop_size_y = 368 center_perterb_max = 40 scale_prob = 1