def read_img(img_read): if IM_RESIZE: img_read = cv2.resize(img_read, (640, 480), interpolation=cv2.INTER_CUBIC) img_raw = np.asarray(img_read, dtype=np.uint8) print("dim2", img_raw.ndim) img_raw_final = img_raw.copy() img = np.asarray(img_read, dtype=np.float32) D, H, W = img.shape img = img.transpose((2, 0, 1)) a_D, a_H, a_W = img.shape img = preprocess(img) o_D, o_H, o_W = img.shape scale = o_H / H scale_ = D / o_H print('D,H,W,a_D, a_H, a_W,o_D,o_H, o_W,scale:', D, H, W, a_D, a_H, a_W, o_D, o_H, o_W, scale) return img, img_raw_final, scale, scale_
def read_img(path): f = Image.open(path) if IM_RESIZE: f = f.resize((640,480), Image.ANTIALIAS) f.convert('RGB') img_raw = np.asarray(f, dtype=np.uint8) print("dim2",img_raw.ndim) img_raw_final = img_raw.copy() img = np.asarray(f, dtype=np.float32) D, H, W = img.shape img = img.transpose((2,0,1)) a_D, a_H, a_W = img.shape img = preprocess(img) o_D, o_H, o_W = img.shape scale = o_H / H scale_=D/o_H print('D,H,W,a_D, a_H, a_W,o_D,o_H, o_W,scale:',D,H,W,a_D, a_H, a_W,o_D,o_H,o_W,scale) return img, img_raw_final, scale,scale_
def read_img(frame): # f = Image.open(path) # if IM_RESIZE: # f = f.resize((640,480), Image.ANTIALIAS) # f.convert('RGB') f = Image.fromarray(frame).convert('RGB') img_raw = np.asarray(f, dtype=np.uint8) img_raw_final = img_raw.copy() img = np.asarray(f, dtype=np.float32) _, H, W = img.shape img = img.transpose((2, 0, 1)) img = preprocess(img) _, o_H, o_W = img.shape scale = o_H / H f = f.resize((img.shape[2], img.shape[1]), Image.ANTIALIAS) f.convert('RGB') img_raw = np.asarray(f, dtype=np.uint8) img_raw_final = img_raw.copy() # print(type(img_raw_final)) # print(img_raw_final.shape) return img, img_raw_final, scale
def getFeatureMap(self, imgs, sizes=None): self.eval() self.use_preset("visualize") prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) feature_maps = list() for img, size in zip(prepared_imgs, sizes): img = at.totensor(img[None]).float() scale = img.shape[3] / size[1] feature_map = self.extractor(img) feature_maps.append(feature_map) feature_maps = np.array(feature_maps) self.use_preset("evaluate") self.train() return feature_maps
def predict(self, imgs, sizes=None, visualize=False): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: (R, 4) :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ * **labels** : Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : Each value indicates how confident the prediction is. """ self.eval() if visualize: self.use_preset('visualize') # 本来是 visualize evaluate prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self( img, scale=scale) # 这里调用了 forward 方法 # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1)) # 有趣 打出来的分,softmax后变成概率值 # 可以在这里看一下预测出来的最大概率是多少,如果太小就直接return出去,下面都不用跑了 # prob 是 300 x 21 的尺寸, np.sum(prob) = 300 raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
def predict(self, imgs, sizes=None, visualize=False): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ self.eval() if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = at.totensor(img[None]).float() scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = (F.softmax(at.totensor(roi_score), dim=1)) bbox, label, score = self._suppress(cls_bbox, prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
def predict(self, imgs,sizes=None,visualize=False): #预测函数 """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ self.eval() #网络设置为eval模式(禁用BatchNorm和Dropout) if visualize: #可视化内容,(跳过) self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() #最终的输出框 labels = list() #最终的输出label scores = list() #最终的输出分数 for img, size in zip(prepared_imgs, sizes): img = at.totensor(img[None]).float() #增加batch维 scale = img.shape[3] / size[1] #获得scale(待定) roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) #前向 # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale #把rois变回原图尺寸(待定) # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] #Q:看网上说ProposalCreator坐标归一化了所以这里要返回原图,但是我没看到。疑问 #A:我觉得"ProposalCreator坐标归一化了"这个有错误,这里要反归一化是因为训练的时候使用的loc归一化了(ProposalTargetCreator),所以预测结果loc是归一化后的,并不是ProposalCreator时候归一化了 roi_cls_loc = (roi_cls_loc * std + mean) #坐标反归一化 roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) #一个框对应n_class个loc,所以要expand_as到同维度后面可以二次修正框 #二次修正框得到最后框 cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) #限制超出尺寸的框 cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) #限制超出尺寸的框 #softmax得到每个框的类别概率 prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) #输入框以及对应的类别概率,抑制输出 bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) #输出坐标,类别,该类别概率 bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') #可视化内容,(跳过) self.train() #返回train模式 return bboxes, labels, scores
def predict(self, imgs,sizes=None,visualize=False): """Detect objects from images. 从图像中检测物体 This method predicts objects for each image. 此方法预测每个图像的对象。 Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ #将模块设置为评估模式。这只对诸如Dropout或BatchNorm等模块有任何影响。module中的方法 self.eval() #可视化 if visualize: #设置为可视化 设置 self.nms_thresh = 0.3 self.score_thresh = 0.7 #评估模式 和 可视化模式 使用不同的nms最大化抑制 和阈值 self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] # print('nei img shape is ', img.shape) img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() #size[600,800] # print('sizes is ', sizes) for img, size in zip(prepared_imgs, sizes): #img由[3,600,800]转为[1,3,600,800] 转为变量,扩充一维 并设置为 预测模式 img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) #scale 为1 scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
def predict(self, imgs, sizes=None, visualize=False, prob_thre=0.7): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ self.eval() # sizes changes when visualize is set to different values if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] # reshaped image size img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) # judge and change type if necessary if t.is_tensor(size[1]): size[1] = int(size[1]) if t.is_tensor(img.shape[3]): img.shape[3] = int(img.shape[3]) scale = img.shape[3] / size[1] (px, py), roi_scores, rois, search_regions, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data px = px.data py = py.data roi = at.totensor(rois) / scale search_regions = at.totensor(search_regions) / scale # Convert to numpy array px = at.tonumpy(px) py = at.tonumpy(py) search_regions = at.tonumpy(search_regions) # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. # use px, py and search_regions to generate boxes cls_bbox = p2bbox(px, py, search_regions, threshold=prob_thre) cls_bbox = at.totensor(cls_bbox) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) # print("raw_cls_bbox shape : ", raw_cls_bbox.shape) # print("raw_prob : ", raw_prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
def predict(self, imgs, sizes=None, visualize=False): ''' 对每张图片进行预测, Args: 输入图片必须是CHW格式的RGB,是np.ndarry Return: 返回的是一个tuple,包含:框的坐标,标签,得分 (bboxes,labels,scores) ''' self.eval() if visualize: #可视化 self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] #get width&height #TODO:为什么可视化需要随机处理 img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = [] scores = [] for img, size in zip(prepared_imgs, sizes): img = at.totensor(img[None]).float() scale = img.shape[3] / size[1] #TODO:调用forward函数,为什么可以这么调用 roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) #TODO:.data是什么作用 roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale mean = t.Tensor(self.loc_normalize_mean).cuda().repeat( self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda().repeat( self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) #TODO: 这个会有变形的作用吗 roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor((cls_bbox)) cls_bbox = cls_bbox.view(-1, self.n_class * 4) '''clamp表示将tensor限制在其范围,让框不超过图片''' cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
def predict(self, imgs, sizes=None, visualize=False): # 设置为eval模式 self.eval() # 是否开启可视化 if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = at.totensor(img[None]).float() # 对读入的图片求尺度scale,因为输入的图像经预处理就会有缩放, # 所以需记录缩放因子scale,这个缩放因子在ProposalCreator # 筛选roi时有用到,即将所有候选框按这个缩放因子映射回原图, # 超出原图边框的区域将被截断。 scale = img.shape[3] / size[1] # 执行forward roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. # 为ProposalCreator对loc做了归一化(-mean /std)处理,所以这里 # 需要再*std+mean,此时的位置参数loc为roi_cls_loc。然后将这128 # 个roi利用roi_cls_loc进行微调,得到新的cls_bbox。 mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) # 解码过程 cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) # 对于分类得分roi_scores,我们需要将其经过softmax后转为概率prob。 # 值得注意的是我们此时得到的是对所有输入128个roi以及位置参数、得分 # 的预处理,下面将筛选出最终的预测结果。 prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) m.bias.data.fill_(0) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) if __name__ == '__main__': from data.util import read_image import cv2 cv_img = cv2.imread('/home/fengkai/dog.jpg') src_img = read_image('/home/fengkai/dog.jpg') from data.dataset import preprocess img = preprocess(array_tool.tonumpy(src_img)) img = torch.from_numpy(img)[None] C2, C3, C4, C5, = decom_resnet50() c2_out = C2(img) c3_out = C3(c2_out) c4_out = C4(c3_out) c5_out = C5(c4_out) import numpy as np from model.fpn import FPN fpn = FPN(256) p2, p3, p4, p5, p6 = fpn.forward(c2_out, c3_out, c4_out, c5_out) rcnn_maps = [p2, p3, p4, p5] feat_stride = [4, 8, 16, 32, 64] spatial_scale = [1. / i for i in feat_stride] for i, l in enumerate(range(2, 6)):
def predict(self, imgs, sizes=None, visualize=False): """ Detect objects from images. This method predicts objects for each image. """ self.eval() self.use_preset('evaluate') if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores
def extract(self, x, num_box): num_batch = len(x) self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in x: img = img.squeeze() size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) roi_cls_locs = list() roi_scores = list() rpn_locs = list() rpn_scores = list() rois = list() anchors = list() features = t.zeros((num_batch, num_box, self.hidden_size)) scales = list() hiddens = list() for i, (img, size) in enumerate(zip(prepared_imgs, sizes)): img = at.totensor(img[None]).float() scale = img.shape[3] / size[1] h = self.extractor(img) rpn_loc, rpn_score, roi, roi_indices, anchor = \ self.rpn(h, size, scale) roi_cls_loc_, roi_score_, feature = self.head(h, roi, roi_indices) roi_score = F.softmax(at.totensor(roi_score_), dim=1) # We are assuming that batch size is 1. roi_score = roi_score.data roi_cls_loc = roi_cls_loc_.data roi = at.totensor(roi) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox( at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score, feat = self._suppress_by_num( raw_cls_bbox, raw_prob, feature, num_box) features[i, :, :] = feat roi_cls_locs.append(roi_cls_loc_) roi_scores.append(roi_score_) rpn_scores.append(rpn_score) rpn_locs.append(rpn_loc) rois.append(roi) anchors.append(anchor) scales.append(scale) hiddens.append(h) return features, roi_cls_locs, roi_scores, rpn_locs, rpn_scores, rois, anchors, scales, hiddens
ymin = 16 * float((-0.5 * affine[0] - 0.5 * affine[1] + affine[4]) + c[0]) xmin = 16 * float((-0.5 * affine[3] - 0.5 * affine[2] + affine[5]) + d[0]) ymax = 16 * float((0.5 * affine[0] + 0.5 * affine[1] + affine[4]) + c[0]) xmax = 16 * float((0.5 * affine[3] + 0.5 * affine[2] + affine[5]) + d[0]) bbox.append([xmin, ymin, xmax, ymax]) return bbox if __name__ == "__main__": #Prepare the test data train_img, train_platetext, train_bbox, test_img, test_platetext, test_bbox = dataset.preprocess( ) #Import the trained model PD = PlateDetector() PD.to(device) PD.load_state_dict(torch.load('models/best_model.pt')) #Generate output file ID for file in os.listdir(f_img): imgID = file.split('.')[0] plate_indx.append(imgID) bbox = WritePlate() #Write to the xml file for i, plate in enumerate(bbox):
""" self.eval() if visualize: self.use_preset('visualize') prepared_imgs = list() <<<<<<< HEAD prepared_imgs_depth = list() ======= #prepared_imgs_depth = list() >>>>>>> b43e1a358b5853ffb749ac931c9cd97a6dccf862 sizes = list() #for img in imgs: img=imgs size = img.shape[1:] <<<<<<< HEAD img ,img_depth= preprocess(at.tonumpy(img),at.tonumpy(imgs_depth)) prepared_imgs.append(img) prepared_imgs_depth.append(img_depth) sizes.append(size) else: prepared_imgs = imgs prepared_imgs_depth = imgs_depth bboxes = list() labels = list() scores = list() for img, img_depth, size in zip(prepared_imgs, prepared_imgs_depth, sizes): img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) img_depth = t.autograd.Variable(at.totensor(img_depth).float()[None], volatile=True) scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, img_depth, scale=scale) =======
def predict(self, imgs,sizes=None,visualize=False): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ self.eval() if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores