def __getitem__(self, idx): img_name = self.img_name[idx] #new work img_path = os.path.join(self.datadir, img_name) if not os.path.exists(img_path): img_path = os.path.join(self.datadir2, img_name) lim = img_name.split('_')[0] if lim not in ["img", "image", "tainchi"]: xml_path = os.path.join(self.labelsdir, img_name.replace('.png', '.xml')) else: self.labelsdir2 = "/home/like/data/VOC/VOC2007/Annotations2" xml_path = os.path.join(self.labelsdir2, img_name.replace('.jpg', '.xml')) img = cv2.imread(img_path) gtbox, _ = readxml(xml_path) h, w, c = img.shape if np.random.randint(2) == 1 and len(gtbox) > 3: img = img[:, ::-1, :] newx1 = w - gtbox[:, 2] - 1 newx2 = w - gtbox[:, 0] - 1 gtbox[:, 0] = newx1 gtbox[:, 2] = newx2 [cls, regr], _ = cal_rpn( (h, w), (math.ceil(h / 16), math.ceil(w / 16)), 16, gtbox) m_img = img - IMAGE_MEAN regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) cls = np.expand_dims(cls, axis=0) m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float() cls = torch.from_numpy(cls).float() regr = torch.from_numpy(regr).float() """使用新/8的策略""" """得到新的gtbox""" newgtbox = [] for box in gtbox: x1 = box[0] y1 = box[1] x2 = box[2] y2 = box[3] newgtbox.append([x1, y1, x1 + 8, y2]) newgtbox.append([x1 + 8, y1, x2, y2]) newgtbox = np.array(newgtbox) [cls_8, regr_8], _ = cal_rpn( (h, w), (math.ceil(h / 8), math.ceil(w / 8)), 8, newgtbox) regr_8 = np.hstack([cls_8.reshape(cls_8.shape[0], 1), regr_8]) cls_8 = np.expand_dims(cls_8, axis=0) cls_8 = torch.from_numpy(cls_8).float() regr_8 = torch.from_numpy(regr_8).float() return m_img, cls, regr, cls_8, regr_8
def __getitem__(self, idx): img_name = self.img_names[idx] img_path = os.path.join(self.datadir, img_name) print(img_path) xml_path = os.path.join(self.labelsdir, img_name.replace('.jpg', '.xml')) gtbox, _ = readxml(xml_path) img = cv2.imread(img_path) h, w, c = img.shape # clip image if np.random.randint(2) == 1: img = img[:, ::-1, :] newx1 = w - gtbox[:, 2] - 1 newx2 = w - gtbox[:, 0] - 1 gtbox[:, 0] = newx1 gtbox[:, 2] = newx2 [cls, regr], _ = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox) m_img = img - IMAGE_MEAN regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) cls = np.expand_dims(cls, axis=0) # transform to torch tensor m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float() cls = torch.from_numpy(cls).float() regr = torch.from_numpy(regr).float() return m_img, cls, regr
def __getitem__(self, index): img = self.imgs[index] xml_file = os.path.join(self.voc_dir, "Annotations", (img + ".xml")) gt_boxes, labels, image_name = self.read_xml(xml_file) image_name = os.path.join(self.voc_dir, "JPEGImages", image_name) # 读取图片,如果尺寸不是ctpn_params.IMAGE_HEIGHT*ctpn_params.IMAGE_HEIGHT,等比例缩放至该尺寸 image = cv2.imread(image_name) if image.shape[0] != ctpn_params.IMAGE_HEIGHT or image.shape[1] != ctpn_params.IMAGE_HEIGHT: image, rescale_fac, padding = resize_image2square(image, ctpn_params.IMAGE_HEIGHT) gt_boxes = adj_gtboxes(gt_boxes, rescale_fac, padding) # 将大标定框分割成宽度是ctpn_params.ANCHORS_WIDTH的小框 gt_boxes, class_ids = gen_gt_from_quadrilaterals(gt_boxes, labels, image.shape, ctpn_params.ANCHORS_WIDTH) h, w, c = image.shape if self.is_debug == False: image = image - ctpn_params.IMAGE_MEAN image = torch.from_numpy(image.transpose([2, 0, 1])).float() # 计算rpn [clss, regr], base_anchors = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gt_boxes) # 数据按[(label, Vc, Vh)]存放 regr = np.hstack([clss.reshape(clss.shape[0], 1), regr]) regr = torch.from_numpy(regr).float() clss = np.expand_dims(clss, axis=0) clss = torch.from_numpy(clss).float() if self.is_debug: return image, gt_boxes, clss, index else: return image, regr, clss, index
def __getitem__(self, idx): img_name = self.img_names[idx] img_path = os.path.join(self.datadir, img_name) #print(img_path) img = cv2.imread(img_path) #cv2.imwrite('original.jpg', img) #####for read error, use default image##### if img is None: # print(img_path) with open('error_imgs.txt', 'a') as f: f.write('{}\n'.format(img_path)) img_name = 'img_2647.jpg' img_path = os.path.join(self.datadir, img_name) img = cv2.imread(img_path) #####for read error, use default image##### h, w, c = img.shape rescale_fac = max(h, w) / 1600 if rescale_fac > 1.0: h = int(h / rescale_fac) w = int(w / rescale_fac) img = cv2.resize(img, (w, h)) gt_path = os.path.join(self.labelsdir, 'gt_' + img_name.split('.')[0] + '.txt') gtbox = self.parse_gtfile(gt_path, rescale_fac) # print('gtbox shape: ', gtbox.shape) # clip image if np.random.randint(2) == 1: img = img[:, ::-1, :] newx1 = w - gtbox[:, 2] - 1 newx2 = w - gtbox[:, 0] - 1 gtbox[:, 0] = newx1 gtbox[:, 2] = newx2 [cls, regr], base_anchors = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox) # print('class', cls, f"{cls.shape}") debug_img = self.draw_boxes(img.copy(), cls, base_anchors, gtbox) os.makedirs('debug', exist_ok=True) cv2.imwrite('debug/{}'.format(img_name), debug_img) m_img = img - IMAGE_MEAN regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) cls = np.expand_dims(cls, axis=0) # transform to torch tensor m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float() cls = torch.from_numpy(cls).float() regr = torch.from_numpy(regr).float() return m_img, cls, regr
def __getitem__(self, idx): img_name = self.img_names[idx] img_path = os.path.join(self.datadir, img_name) # print(img_path) txt_path = os.path.join(self.labelsdir, img_name.replace('.jpg', '.txt')) gtboxes = readtxt(txt_path) if len(gtboxes) < 2: return self.__getitem__((idx + 1) % len(self)) gtbox = np.array(gtboxes) img = cv2.imread(img_path) h, w, c = img.shape if w < 32: img = cv2.resize(img, (h * 5, w * 5), cv2.INTER_NEAREST) gtbox = gtbox * 5 # clip image if np.random.randint(2) == 1: img = img[:, ::-1, :] newx1 = w - gtbox[:, 2] - 1 newx2 = w - gtbox[:, 0] - 1 gtbox[:, 0] = newx1 gtbox[:, 2] = newx2 [cls, regr], _ = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox) if cls is None or max(cls) == -1: return self.__getitem__((idx + 1) % len(self)) m_img = img - IMAGE_MEAN regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) cls = np.expand_dims(cls, axis=0) # transform to torch tensor m_img = torch.from_numpy(m_img.transpose([2, 0, 1])).float() cls = torch.from_numpy(cls).float() regr = torch.from_numpy(regr).float() return m_img, cls, regr
model = CTPN_Model() clsp, regrp = model(img) import cv2 import numpy as np from ctpn_utils import cal_rpn path = "./detection_test/205531101_00590.jpg" img = cv2.imread(path) gtboxes = [(40, 40, 140, 140), (56, 40, 140, 140)] gtbox = np.array(gtboxes) h, w, c = img.shape [cls, regr], _ = cal_rpn((h, w), (int(h / 16), int(w / 16)), 16, gtbox) regr = np.hstack([cls.reshape(cls.shape[0], 1), regr]) regr = torch.from_numpy(regr) regr = regr.unsqueeze(0) cls = np.expand_dims(cls, axis=0) cls = torch.from_numpy(cls) cls = cls.unsqueeze(0) # loss = RPN_CLS_Loss("cpu") # print(loss(clsp,cls)) regrp = regrp.double() regr = regr.double()