def __getitem__(self, idx): img_name = self.img_names[idx] img_path = os.path.join(self.datadir, img_name) img = cv2.imread(img_path) h, w, c = img.shape rescale_fac = max(h, w) / 1000 if rescale_fac > 1.0: h = int(h / rescale_fac) w = int(w / rescale_fac) img = cv2.resize(img, (w, h)) xml_path = os.path.join(self.labelsdir, img_name.split('.')[0] + '.xml') gtbox = self.generate_gtboxes(xml_path, rescale_fac) if np.random.randint(2) == 1: img = img[:, ::-1, :] newx1 = w - gtbox[:, 2] - 1 newx2 = w - gtbox[:, 0] - 1 gtbox[:, 0] = newx1 gtbox[:, 2] = newx2 [cls, regr] = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox) regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) cls = np.expand_dims(cls, axis=0) m_img = img - IMAGE_MEAN m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float() cls = torch.from_numpy(cls).float() regr = torch.from_numpy(regr).float() return m_img, cls, regr
def transform(img, gtbox=None): h, w, c = img.shape rescale_fac = max(h, w) / 1000 if rescale_fac > 1.0: h = int(h / rescale_fac) w = int(w / rescale_fac) img = cv2.resize(img, (w, h)) if gtbox is not None: [cls, regr] = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox) regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) cls = np.expand_dims(cls, axis=0) cls = torch.from_numpy(cls).float() regr = torch.from_numpy(regr).float() img = torch.from_numpy(img.transpose([2, 0, 1])).float() if gtbox is not None: return img, cls, regr return img
def __getitem__(self, idx): # datadirからイメージのidx番目を抜き出す img_name = self.img_names[idx] img_path = os.path.join(self.datadir, img_name) img = cv2.imread(img_path) h, w, c = img.shape rescale_fac = max(h, w) / 1000 # 画像の横幅、縦幅のどちらかが1000を越えていた場合、長いほうが1000になるようにリサイズ if rescale_fac > 1.0: h = int(h / rescale_fac) w = int(w / rescale_fac) img = cv2.resize(img,(w,h)) xml_path = os.path.join(self.labelsdir, img_name.split('.')[0]+'.xml') gtbox = self.generate_gtboxes(xml_path, rescale_fac) # 1/2の確率で画像に以下の処理する if np.random.randint(2) == 1: img = img[:, ::-1, :] # X方向にひっくり返す newx1 = w - gtbox[:, 2] - 1 # gtboxもそれに併せてひっくり返す newx2 = w - gtbox[:, 0] - 1 gtbox[:, 0] = newx1 gtbox[:, 2] = newx2 [cls, regr] = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox) # cls.shape == (アンカー数), # regr.shape = (アンカー数, 2) regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) # regr: # [[cls, anchor[0], anchor[1]] # あるアンカーについて、そのアンカーは検出対象か、中心はどれくらいずれているか、高さはどれくらいことなるか # ..... # ] cls = np.expand_dims(cls, axis=0) # 1重配列を2重配列に、大きく括弧をつけて囲む m_img = img - IMAGE_MEAN # RGBについて平均値を引いておく m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float() # チャンネルから始めるようにする cls = torch.from_numpy(cls).float() regr = torch.from_numpy(regr).float() return m_img, cls, regr