class ReadDataset(u_data.Dataset): #data.Dataset """ VOC Detection Dataset Object """ def __init__(self): self.voc_file = cfgs.voc_file self.coco_file = cfgs.coco_file self.img_size = cfgs.ImgSize self.voc_dir = cfgs.voc_dir self.coco_dir = cfgs.coco_dir self.ids = [] self.annotations = [] self.load_txt() self.idx = 0 self.total_num = self.__len__() self.shulf_num = list(range(self.total_num)) random.shuffle(self.shulf_num) auger_list=["Sequential", "Fliplr","Dropout", \ "AdditiveGaussianNoise","SigmoidContrast","Multiply"] self.transfrom_imgs = Transform(img_auger_list=auger_list) def __getitem__(self, index): im, gt, _, _ = self.pull_item(index) return im, gt def __len__(self): return len(self.annotations) def load_txt(self): self.voc_r = open(self.voc_file, 'r') #self.coco_r = open(self.coco_file,'r') voc_annotations = self.voc_r.readlines() #coco_annotations = self.coco_r.readlines() for tmp in voc_annotations: tmp_splits = tmp.strip().split(',') img_path = os.path.join(self.voc_dir, tmp_splits[0]) self.ids.append((self.voc_dir, tmp_splits[0].split('/')[-1][:-4])) bbox = map(float, tmp_splits[1:]) if not isinstance(bbox, list): bbox = list(bbox) bbox.insert(0, img_path) self.annotations.append(bbox) ''' for tmp in coco_annotations: tmp_splits = tmp.strip().split(',') img_path = os.path.join(self.coco_dir,tmp_splits[0]) bbox = map(float, tmp_splits[1:]) if not isinstance(bbox,list): bbox = list(bbox) bbox.insert(0,img_path) self.annotations.append(bbox) ''' def close_txt(self): self.voc_r.close() self.coco_r.close() def get_batch(self, batch_size): batch_data = torch.zeros([batch_size, 3, self.img_size, self.img_size], dtype=torch.float32) targets = [] if self.idx >= self.total_num - 1: random.shuffle(self.shulf_num) self.idx = 0 for tmp_idx in range(batch_size): if self.idx >= self.total_num: rd_idx = 0 else: rd_idx = self.shulf_num[self.idx] img, gt, _, _ = self.pull_item(rd_idx) self.idx += 1 batch_data[tmp_idx, :, :, :] = img targets.append(torch.FloatTensor(gt)) return batch_data, targets def pull_item(self, index): ''' output: img - shape(c,h,w) gt_boxes+label: box-(x1,y1,x2,y2) label: dataset_class_num ''' tmp_annotation = self.annotations[index] tmp_path = tmp_annotation[0] img_data = cv2.imread(tmp_path) h, w = img_data.shape[:2] img_data = img_data[:, :, ::-1] gt_box_label = np.array(tmp_annotation[1:], dtype=np.float32).reshape(-1, 5) #print(gt_box_label) img_data, gt_box_label = self.re_scale(img_data, gt_box_label) img_data = self.normalize(img_data) return torch.from_numpy(img_data).permute(2, 0, 1), gt_box_label, h, w #return img_data,gt_box_label def re_scale(self, img, boxes): img_h, img_w = img.shape[:2] boxes = np.array(boxes, dtype=np.float32) ''' ratio = max(img_h, img_w) / float(self.img_size) new_h = int(img_h / ratio) new_w = int(img_w / ratio) ox = (self.img_size - new_w) // 2 oy = (self.img_size - new_h) // 2 scaled = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_CUBIC) out = np.ones((self.img_size, self.img_size, 3), dtype=np.uint8) * 127 out[oy:oy + new_h, ox:ox + new_w, :] = scaled ''' boxes[:, 0] = boxes[:, 0] / float(img_w) boxes[:, 1] = boxes[:, 1] / float(img_h) boxes[:, 2] = boxes[:, 2] / float(img_w) boxes[:, 3] = boxes[:, 3] / float(img_h) out = cv2.resize(img, (self.img_size, self.img_size)) ''' boxes[:,0] = boxes[:,0] * new_w + ox boxes[:,1] = boxes[:,1] * new_h + oy boxes[:,2] = boxes[:,2] * new_w + ox boxes[:,3] = boxes[:,3] * new_h + oy ''' return out, boxes def normalize(self, img): ''' img = img / 255.0 img[:,:,0] -= cfgs.PIXEL_MEAN[0] img[:,:,0] = img[:,:,0] / cfgs.PIXEL_NORM[0] img[:,:,1] -= cfgs.PIXEL_MEAN[1] img[:,:,1] = img[:,:,1] / cfgs.PIXEL_NORM[1] img[:,:,2] -= cfgs.PIXEL_MEAN[2] img[:,:,2] = img[:,:,2] / cfgs.PIXEL_NORM[2] ''' img[:, :, 0] -= cfgs.PIXEL_MEAN[0] img[:, :, 1] -= cfgs.PIXEL_MEAN[1] img[:, :, 2] -= cfgs.PIXEL_MEAN[2] return img.astype(np.float32) def transform(self, img, gt_box_labels): ''' annotation: 1/img_01 x1 y1 x2 y2 x1 y1 x2 y2 ... ''' #img_dict = dict() if img is None: return None boxes = gt_box_labels[:, :4] labels = gt_box_labels[:, 4] img_aug, boxes_aug, keep_idx = self.transfrom_imgs.aug_img_boxes( img_org, [boxes.tolist()]) if not len(boxes_aug) > 0: #print("aug box is None") return None img_data = np.array(img_aug[0], np.uint8) boxes_trans = np.array(boxes_aug[0], dtype=np.int32).reshape(-1, 4) label = np.array(labels[keep_idx[1][0]], dtype=np.int32).reshape(-1, 1) gt_box_labels = np.concatenate((boxes_trans, label), axis=1) img_dict['img_data'] = img_data img_dict['gt'] = gt_box_labels #gt_list for i in range(gt_box_labels.shape[0]): tmp_key = cfgs.VOCDataNames[int(gt_box_labels[i, 4])] cnt_dict[tmp_key] += 1 return img_dict
class convert_to_pkl(object): def __init__(self, args): self.save_dir = args.save_dir self.voc_dir = args.voc_dir self.coco_dir = args.coco_dir self.file_out = args.out_file self.voc_anno_file = args.voc_anno self.coco_anno_file = args.coco_anno self.record_file = args.record_file if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) self.load_files() def load_files(self): cache_file = os.path.join(self.save_dir, self.file_out) self.file_w = open(cache_file, 'wb') self.voc_anno_r = open(self.voc_anno_file, 'r') self.coco_anno_r = open(self.coco_anno_file, 'r') self.record_w = open(self.record_file, 'w') auger_list=["Sequential", "Fliplr","Dropout", \ "AdditiveGaussianNoise","SigmoidContrast","Multiply"] self.transfrom_imgs = Transform(img_auger_list=auger_list) def rd_anotation(self, annotation, image_dir, cnt_dict): ''' annotation: 1/img_01 x1 y1 x2 y2 x1 y1 x2 y2 ... ''' img_dict = dict() annotation = annotation.strip().split(',') img_prefix = annotation[0] #boxed change to float type bbox = map(float, annotation[1:]) if not isinstance(bbox, list): bbox = list(bbox) gt_box_labels = np.asarray(bbox, dtype=np.int32).reshape(-1, 5) #load image img_path = os.path.join(image_dir, img_prefix) if not os.path.exists(img_path): print('not exist:', img_path) return None img_org = cv2.imread(img_path) if img_org is None: return None img_org = np.array(img_org, dtype=np.uint8) img_dict['img_data'] = img_org img_dict['gt'] = gt_box_labels #gt_list for i in range(gt_box_labels.shape[0]): tmp_key = cfgs.VOCDataNames[int(gt_box_labels[i, 4])] cnt_dict[tmp_key] += 1 return img_dict def transform_imgbox(self, img_dict, cnt_dict): ''' annotation: 1/img_01 x1 y1 x2 y2 x1 y1 x2 y2 ... ''' #img_dict = dict() if img_dict is None: return None img_org = img_dict['img_data'] gt_box_labels = img_dict['gt'] boxes = gt_box_labels[:, :4] labels = gt_box_labels[:, 4] if img_org is None: print("aug img is None") return None img_aug, boxes_aug, keep_idx = self.transfrom_imgs.aug_img_boxes( img_org, [boxes.tolist()]) if not len(boxes_aug) > 0: #print("aug box is None") return None img_data = np.array(img_aug[0], np.uint8) boxes_trans = np.array(boxes_aug[0], dtype=np.int32).reshape(-1, 4) label = np.array(labels[keep_idx[1][0]], dtype=np.int32).reshape(-1, 1) gt_box_labels = np.concatenate((boxes_trans, label), axis=1) img_dict['img_data'] = img_data img_dict['gt'] = gt_box_labels #gt_list for i in range(gt_box_labels.shape[0]): tmp_key = cfgs.VOCDataNames[int(gt_box_labels[i, 4])] cnt_dict[tmp_key] += 1 return img_dict def write_pkl(self): voc_cnts = self.voc_anno_r.readlines() coco_cnts = self.coco_anno_r.readlines() total_coco = len(coco_cnts) total_voc = len(voc_cnts) cnt_w = max(total_coco, total_voc) instance_cnt_dic = defaultdict(lambda: 0) total_img_cnt = 0 cnt_failed = 0 for idx in tqdm(range(cnt_w)): if idx < total_voc: tmp_voc = voc_cnts[idx] tmp_dict = self.rd_anotation(tmp_voc, self.voc_dir, instance_cnt_dic) if tmp_dict is not None: Pickle.dump(tmp_dict, self.file_w, Pickle.HIGHEST_PROTOCOL) total_img_cnt += 1 #label_show(tmp_dict) if random.randint(0, 1): img_dict = self.transform_imgbox(tmp_dict, instance_cnt_dic) if img_dict is None: cnt_failed += 1 else: Pickle.dump(img_dict, self.file_w, Pickle.HIGHEST_PROTOCOL) #label_show(img_dict) total_img_cnt += 1 tmp_coco = coco_cnts[idx] tmp_dict = self.rd_anotation(tmp_coco, self.coco_dir, instance_cnt_dic) if tmp_dict is not None: Pickle.dump(tmp_dict, self.file_w, Pickle.HIGHEST_PROTOCOL) total_img_cnt += 1 #label_show(tmp_dict) if random.randint(0, 1): img_dict = self.transform_imgbox(tmp_dict, instance_cnt_dic) if img_dict is None: cnt_failed += 1 else: Pickle.dump(img_dict, self.file_w, Pickle.HIGHEST_PROTOCOL) #label_show(img_dict) total_img_cnt += 1 for tmp_key in sorted(instance_cnt_dic.keys()): self.record_w.write("{}:{}\n".format(tmp_key, instance_cnt_dic[tmp_key])) self.voc_anno_r.close() self.coco_anno_r.close() self.file_w.close() self.record_w.close() print("total img:", total_img_cnt) print('failed aug img:', cnt_failed)