예제 #1
0
    def __getitem__(self, idx):
        img_name = self.img_names[idx]
        img_path = os.path.join(self.datadir, img_name)
        img = cv2.imread(img_path)
        h, w, c = img.shape
        rescale_fac = max(h, w) / 1000
        if rescale_fac > 1.0:
            h = int(h / rescale_fac)
            w = int(w / rescale_fac)
            img = cv2.resize(img, (w, h))

        xml_path = os.path.join(self.labelsdir,
                                img_name.split('.')[0] + '.xml')
        gtbox = self.generate_gtboxes(xml_path, rescale_fac)

        if np.random.randint(2) == 1:
            img = img[:, ::-1, :]
            newx1 = w - gtbox[:, 2] - 1
            newx2 = w - gtbox[:, 0] - 1
            gtbox[:, 0] = newx1
            gtbox[:, 2] = newx2

        [cls, regr] = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox)
        regr = np.hstack([cls.reshape(cls.shape[0], 1), regr])
        cls = np.expand_dims(cls, axis=0)

        m_img = img - IMAGE_MEAN
        m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float()
        cls = torch.from_numpy(cls).float()
        regr = torch.from_numpy(regr).float()

        return m_img, cls, regr
예제 #2
0
def transform(img, gtbox=None):
    h, w, c = img.shape
    rescale_fac = max(h, w) / 1000
    if rescale_fac > 1.0:
        h = int(h / rescale_fac)
        w = int(w / rescale_fac)
        img = cv2.resize(img, (w, h))

    if gtbox is not None:
        [cls, regr] = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox)
        regr = np.hstack([cls.reshape(cls.shape[0], 1), regr])
        cls = np.expand_dims(cls, axis=0)
        cls = torch.from_numpy(cls).float()
        regr = torch.from_numpy(regr).float()

    img = torch.from_numpy(img.transpose([2, 0, 1])).float()

    if gtbox is not None:
        return img, cls, regr
    return img
예제 #3
0
    def __getitem__(self, idx):
        # datadirからイメージのidx番目を抜き出す
        img_name = self.img_names[idx]
        img_path = os.path.join(self.datadir, img_name)
        img = cv2.imread(img_path)
        h, w, c = img.shape
        rescale_fac = max(h, w) / 1000
        # 画像の横幅、縦幅のどちらかが1000を越えていた場合、長いほうが1000になるようにリサイズ
        if rescale_fac > 1.0:
            h = int(h / rescale_fac)
            w = int(w / rescale_fac)
            img = cv2.resize(img,(w,h))

        xml_path = os.path.join(self.labelsdir, img_name.split('.')[0]+'.xml')
        gtbox = self.generate_gtboxes(xml_path, rescale_fac)

        # 1/2の確率で画像に以下の処理する
        if np.random.randint(2) == 1:
            img = img[:, ::-1, :] # X方向にひっくり返す
            newx1 = w - gtbox[:, 2] - 1 # gtboxもそれに併せてひっくり返す
            newx2 = w - gtbox[:, 0] - 1
            gtbox[:, 0] = newx1
            gtbox[:, 2] = newx2

        [cls, regr] = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox)
        # cls.shape == (アンカー数), # regr.shape = (アンカー数, 2)
        regr = np.hstack([cls.reshape(cls.shape[0], 1), regr])
        # regr:
        # [[cls, anchor[0], anchor[1]] # あるアンカーについて、そのアンカーは検出対象か、中心はどれくらいずれているか、高さはどれくらいことなるか
        # .....
        # ]
        cls = np.expand_dims(cls, axis=0) # 1重配列を2重配列に、大きく括弧をつけて囲む

        m_img = img - IMAGE_MEAN # RGBについて平均値を引いておく
        m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float() # チャンネルから始めるようにする
        cls = torch.from_numpy(cls).float()
        regr = torch.from_numpy(regr).float()

        return m_img, cls, regr