Exemple #1
0
def read_img(img_read):
    if IM_RESIZE:
        img_read = cv2.resize(img_read, (640, 480),
                              interpolation=cv2.INTER_CUBIC)
    img_raw = np.asarray(img_read, dtype=np.uint8)
    print("dim2", img_raw.ndim)
    img_raw_final = img_raw.copy()
    img = np.asarray(img_read, dtype=np.float32)
    D, H, W = img.shape

    img = img.transpose((2, 0, 1))
    a_D, a_H, a_W = img.shape
    img = preprocess(img)
    o_D, o_H, o_W = img.shape
    scale = o_H / H
    scale_ = D / o_H
    print('D,H,W,a_D, a_H, a_W,o_D,o_H, o_W,scale:', D, H, W, a_D, a_H, a_W,
          o_D, o_H, o_W, scale)
    return img, img_raw_final, scale, scale_
def read_img(path):
    f = Image.open(path)
    if IM_RESIZE:
        f = f.resize((640,480), Image.ANTIALIAS)

    f.convert('RGB')
    img_raw = np.asarray(f, dtype=np.uint8)
    print("dim2",img_raw.ndim) 
    img_raw_final = img_raw.copy()
    img = np.asarray(f, dtype=np.float32)
    D, H, W = img.shape
    
    img = img.transpose((2,0,1))
    a_D, a_H, a_W = img.shape
    img = preprocess(img)
    o_D, o_H, o_W = img.shape
    scale = o_H / H
    scale_=D/o_H
    print('D,H,W,a_D, a_H, a_W,o_D,o_H, o_W,scale:',D,H,W,a_D, a_H, a_W,o_D,o_H,o_W,scale)
    return img, img_raw_final, scale,scale_
def read_img(frame):
    # f = Image.open(path)
    # if IM_RESIZE:
    #     f = f.resize((640,480), Image.ANTIALIAS)
    # f.convert('RGB')
    f = Image.fromarray(frame).convert('RGB')
    img_raw = np.asarray(f, dtype=np.uint8)
    img_raw_final = img_raw.copy()
    img = np.asarray(f, dtype=np.float32)
    _, H, W = img.shape
    img = img.transpose((2, 0, 1))
    img = preprocess(img)
    _, o_H, o_W = img.shape
    scale = o_H / H
    f = f.resize((img.shape[2], img.shape[1]), Image.ANTIALIAS)
    f.convert('RGB')
    img_raw = np.asarray(f, dtype=np.uint8)
    img_raw_final = img_raw.copy()
    # print(type(img_raw_final))
    # print(img_raw_final.shape)
    return img, img_raw_final, scale
Exemple #4
0
    def getFeatureMap(self, imgs, sizes=None):
        self.eval()
        self.use_preset("visualize")
        prepared_imgs = list()
        sizes = list()
        for img in imgs:
            size = img.shape[1:]
            img = preprocess(at.tonumpy(img))
            prepared_imgs.append(img)
            sizes.append(size)

        feature_maps = list()
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            feature_map = self.extractor(img)
            feature_maps.append(feature_map)
        feature_maps = np.array(feature_maps)

        self.use_preset("evaluate")
        self.train()
        return feature_maps
Exemple #5
0
    def predict(self, imgs, sizes=None, visualize=False):
        """Detect objects from images.
        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: (R, 4)
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
           * **labels** : 
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : 
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')  # 本来是 visualize   evaluate
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None],
                                      volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(
                img, scale=scale)  # 这里调用了 forward 方法

            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))

            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score),
                                        dim=1))  # 有趣 打出来的分,softmax后变成概率值
            # 可以在这里看一下预测出来的最大概率是多少,如果太小就直接return出去,下面都不用跑了
            # prob 是 300 x 21 的尺寸, np.sum(prob) = 300

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Exemple #6
0
    def predict(self, imgs, sizes=None, visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = (F.softmax(at.totensor(roi_score), dim=1))

            bbox, label, score = self._suppress(cls_bbox, prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Exemple #7
0
    def predict(self, imgs,sizes=None,visualize=False): #预测函数
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval() #网络设置为eval模式(禁用BatchNorm和Dropout)
        if visualize: #可视化内容,(跳过)
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list() #最终的输出框
        labels = list() #最终的输出label
        scores = list() #最终的输出分数
        for img, size in zip(prepared_imgs, sizes): 
            img = at.totensor(img[None]).float() #增加batch维
            scale = img.shape[3] / size[1] #获得scale(待定)
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) #前向
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale #把rois变回原图尺寸(待定)

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]
            #Q:看网上说ProposalCreator坐标归一化了所以这里要返回原图,但是我没看到。疑问
            #A:我觉得"ProposalCreator坐标归一化了"这个有错误,这里要反归一化是因为训练的时候使用的loc归一化了(ProposalTargetCreator),所以预测结果loc是归一化后的,并不是ProposalCreator时候归一化了
            roi_cls_loc = (roi_cls_loc * std + mean) #坐标反归一化
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) #一个框对应n_class个loc,所以要expand_as到同维度后面可以二次修正框
            
            #二次修正框得到最后框
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) #限制超出尺寸的框
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) #限制超出尺寸的框
            #softmax得到每个框的类别概率
            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)
            #输入框以及对应的类别概率,抑制输出
            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            
            #输出坐标,类别,该类别概率
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate') #可视化内容,(跳过)
        self.train() #返回train模式
        return bboxes, labels, scores
Exemple #8
0
    def predict(self, imgs,sizes=None,visualize=False):
        """Detect objects from images.
        从图像中检测物体

        This method predicts objects for each image.
          此方法预测每个图像的对象。
        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        #将模块设置为评估模式。这只对诸如Dropout或BatchNorm等模块有任何影响。module中的方法
        self.eval()
        #可视化
        if visualize:
            #设置为可视化  设置 self.nms_thresh = 0.3   self.score_thresh = 0.7
            #评估模式 和 可视化模式 使用不同的nms最大化抑制 和阈值
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                # print('nei img shape is ', img.shape)
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list()
        labels = list()
        scores = list()
        #size[600,800]
        # print('sizes is ', sizes)
        for img, size in zip(prepared_imgs, sizes):
            #img由[3,600,800]转为[1,3,600,800]  转为变量,扩充一维 并设置为 预测模式
            img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True)
            #scale 为1
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Exemple #9
0
    def predict(self, imgs, sizes=None, visualize=False, prob_thre=0.7):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()

        # sizes changes when visualize is set to different values
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]  # reshaped image size
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs

        bboxes = list()
        labels = list()
        scores = list()

        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None],
                                      volatile=True)

            # judge and change type if necessary
            if t.is_tensor(size[1]):
                size[1] = int(size[1])

            if t.is_tensor(img.shape[3]):
                img.shape[3] = int(img.shape[3])

            scale = img.shape[3] / size[1]

            (px, py), roi_scores, rois, search_regions, _ = self(img,
                                                                 scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            px = px.data
            py = py.data

            roi = at.totensor(rois) / scale
            search_regions = at.totensor(search_regions) / scale

            # Convert to numpy array
            px = at.tonumpy(px)
            py = at.tonumpy(py)
            search_regions = at.tonumpy(search_regions)

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.

            # use px, py and search_regions to generate boxes
            cls_bbox = p2bbox(px, py, search_regions, threshold=prob_thre)
            cls_bbox = at.totensor(cls_bbox)

            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            # print("raw_cls_bbox shape : ", raw_cls_bbox.shape)
            # print("raw_prob : ", raw_prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)

            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()

        return bboxes, labels, scores
Exemple #10
0
    def predict(self, imgs, sizes=None, visualize=False):
        '''
        对每张图片进行预测,
        Args:
            输入图片必须是CHW格式的RGB,是np.ndarry
        Return:
            返回的是一个tuple,包含:框的坐标,标签,得分
            (bboxes,labels,scores)
        '''
        self.eval()
        if visualize:  #可视化
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]  #get width&height
                #TODO:为什么可视化需要随机处理
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = []
        scores = []
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            #TODO:调用forward函数,为什么可以这么调用
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            #TODO:.data是什么作用
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            mean = t.Tensor(self.loc_normalize_mean).cuda().repeat(
                self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda().repeat(
                self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            #TODO: 这个会有变形的作用吗
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)

            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor((cls_bbox))
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            '''clamp表示将tensor限制在其范围,让框不超过图片'''
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)
        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
    def predict(self, imgs, sizes=None, visualize=False):
        # 设置为eval模式
        self.eval()
        # 是否开启可视化
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = at.totensor(img[None]).float()
            # 对读入的图片求尺度scale,因为输入的图像经预处理就会有缩放,
            # 所以需记录缩放因子scale,这个缩放因子在ProposalCreator
            # 筛选roi时有用到,即将所有候选框按这个缩放因子映射回原图,
            # 超出原图边框的区域将被截断。
            scale = img.shape[3] / size[1]
            # 执行forward
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.

            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            # 为ProposalCreator对loc做了归一化(-mean /std)处理,所以这里
            # 需要再*std+mean,此时的位置参数loc为roi_cls_loc。然后将这128
            # 个roi利用roi_cls_loc进行微调,得到新的cls_bbox。
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            # 解码过程
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])
            # 对于分类得分roi_scores,我们需要将其经过softmax后转为概率prob。
            # 值得注意的是我们此时得到的是对所有输入128个roi以及位置参数、得分
            # 的预处理,下面将筛选出最终的预测结果。
            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
        m.bias.data.fill_(0)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


if __name__ == '__main__':
    from data.util import read_image
    import cv2
    cv_img = cv2.imread('/home/fengkai/dog.jpg')
    src_img = read_image('/home/fengkai/dog.jpg')

    from data.dataset import preprocess
    img = preprocess(array_tool.tonumpy(src_img))
    img = torch.from_numpy(img)[None]
    C2, C3, C4, C5, = decom_resnet50()
    c2_out = C2(img)
    c3_out = C3(c2_out)
    c4_out = C4(c3_out)
    c5_out = C5(c4_out)
    import numpy as np
    from model.fpn import FPN
    fpn = FPN(256)
    p2, p3, p4, p5, p6 = fpn.forward(c2_out, c3_out, c4_out, c5_out)

    rcnn_maps = [p2, p3, p4, p5]
    feat_stride = [4, 8, 16, 32, 64]
    spatial_scale = [1. / i for i in feat_stride]
    for i, l in enumerate(range(2, 6)):
    def predict(self, imgs, sizes=None, visualize=False):
        """

        Detect objects from images.
        This method predicts objects for each image.

        """
        self.eval()
        self.use_preset('evaluate')
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
            prepared_imgs = imgs
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None],
                                      volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores
Exemple #14
0
    def extract(self, x, num_box):
        num_batch = len(x)
        self.use_preset('visualize')
        prepared_imgs = list()
        sizes = list()
        for img in x:
            img = img.squeeze()
            size = img.shape[1:]
            img = preprocess(at.tonumpy(img))
            prepared_imgs.append(img)
            sizes.append(size)

        roi_cls_locs = list()
        roi_scores = list()
        rpn_locs = list()
        rpn_scores = list()
        rois = list()
        anchors = list()
        features = t.zeros((num_batch, num_box, self.hidden_size))
        scales = list()
        hiddens = list()
        for i, (img, size) in enumerate(zip(prepared_imgs, sizes)):
            img = at.totensor(img[None]).float()
            scale = img.shape[3] / size[1]
            h = self.extractor(img)
            rpn_loc, rpn_score, roi, roi_indices, anchor = \
                self.rpn(h, size, scale)
            roi_cls_loc_, roi_score_, feature = self.head(h, roi, roi_indices)
            roi_score = F.softmax(at.totensor(roi_score_), dim=1)
            # We are assuming that batch size is 1.
            roi_score = roi_score.data
            roi_cls_loc = roi_cls_loc_.data
            roi = at.totensor(roi) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(
                at.tonumpy(roi).reshape((-1, 4)),
                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.totensor(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)
            bbox, label, score, feat = self._suppress_by_num(
                raw_cls_bbox, raw_prob, feature, num_box)
            features[i, :, :] = feat
            roi_cls_locs.append(roi_cls_loc_)
            roi_scores.append(roi_score_)
            rpn_scores.append(rpn_score)
            rpn_locs.append(rpn_loc)
            rois.append(roi)
            anchors.append(anchor)
            scales.append(scale)
            hiddens.append(h)

        return features, roi_cls_locs, roi_scores, rpn_locs, rpn_scores, rois, anchors, scales, hiddens
Exemple #15
0
        ymin = 16 * float((-0.5 * affine[0] - 0.5 * affine[1] + affine[4]) +
                          c[0])
        xmin = 16 * float((-0.5 * affine[3] - 0.5 * affine[2] + affine[5]) +
                          d[0])
        ymax = 16 * float((0.5 * affine[0] + 0.5 * affine[1] + affine[4]) +
                          c[0])
        xmax = 16 * float((0.5 * affine[3] + 0.5 * affine[2] + affine[5]) +
                          d[0])
        bbox.append([xmin, ymin, xmax, ymax])
    return bbox


if __name__ == "__main__":

    #Prepare the test data
    train_img, train_platetext, train_bbox, test_img, test_platetext, test_bbox = dataset.preprocess(
    )

    #Import the trained model
    PD = PlateDetector()
    PD.to(device)
    PD.load_state_dict(torch.load('models/best_model.pt'))

    #Generate output file ID
    for file in os.listdir(f_img):
        imgID = file.split('.')[0]
        plate_indx.append(imgID)

    bbox = WritePlate()

    #Write to the xml file
    for i, plate in enumerate(bbox):
Exemple #16
0
        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
<<<<<<< HEAD
            prepared_imgs_depth = list()
=======
            #prepared_imgs_depth = list()
>>>>>>> b43e1a358b5853ffb749ac931c9cd97a6dccf862
            sizes = list()
            #for img in imgs:
            img=imgs
            size = img.shape[1:]
<<<<<<< HEAD
            img ,img_depth= preprocess(at.tonumpy(img),at.tonumpy(imgs_depth))
            prepared_imgs.append(img)
            prepared_imgs_depth.append(img_depth)
            sizes.append(size)
        else:
             prepared_imgs = imgs
             prepared_imgs_depth = imgs_depth
        bboxes = list()
        labels = list()
        scores = list()
        for img, img_depth, size in zip(prepared_imgs, prepared_imgs_depth, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True)
            img_depth = t.autograd.Variable(at.totensor(img_depth).float()[None], volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, img_depth, scale=scale)
=======
    def predict(self, imgs,sizes=None,visualize=False):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """
        self.eval()
        if visualize:
            self.use_preset('visualize')
            prepared_imgs = list()
            sizes = list()
            for img in imgs:
                size = img.shape[1:]
                img = preprocess(at.tonumpy(img))
                prepared_imgs.append(img)
                sizes.append(size)
        else:
             prepared_imgs = imgs 
        bboxes = list()
        labels = list()
        scores = list()
        for img, size in zip(prepared_imgs, sizes):
            img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True)
            scale = img.shape[3] / size[1]
            roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale)
            # We are assuming that batch size is 1.
            roi_score = roi_scores.data
            roi_cls_loc = roi_cls_loc.data
            roi = at.totensor(rois) / scale

            # Convert predictions to bounding boxes in image coordinates.
            # Bounding boxes are scaled to the scale of the input images.
            mean = t.Tensor(self.loc_normalize_mean).cuda(). \
                repeat(self.n_class)[None]
            std = t.Tensor(self.loc_normalize_std).cuda(). \
                repeat(self.n_class)[None]

            roi_cls_loc = (roi_cls_loc * std + mean)
            roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                                at.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = at.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class * 4)
            # clip bounding box
            cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
            cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

            prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1))

            raw_cls_bbox = at.tonumpy(cls_bbox)
            raw_prob = at.tonumpy(prob)

            bbox, label, score = self._suppress(raw_cls_bbox, raw_prob)
            bboxes.append(bbox)
            labels.append(label)
            scores.append(score)

        self.use_preset('evaluate')
        self.train()
        return bboxes, labels, scores