def load_pascal_dataset(): name = 'pascal' data = [] for year, image_set in datasets: img_ids_filename = f'{source_dir}/{name}/VOCdevkit/VOC{year}/ImageSets/Main/{image_set}.txt' ifs_img_ids = open(img_ids_filename) img_ids = ifs_img_ids.read().strip().split() for image_id in img_ids: anno_filename = f'{source_dir}/{name}/VOCdevkit/VOC{year}/Annotations/{image_id}.xml' ifs_anno = open(anno_filename) tree = ET.parse(ifs_anno) root = tree.getroot() size = root.find('size') w = int(size.find('width').text) h = int(size.find('height').text) for obj in root.iter('object'): xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) bb = convert_bbox((w, h), b) data.append(bb[2:]) ifs_anno.close() ifs_img_ids.close() return np.array(data)
def generate_arrays_from_file(path, batch_size, num_class=2): while 1: f = open(path) cnt = 0 X = [] Y1 = [] Y2 = [] for line in f: # create Numpy arrays of input data # and labels, from each line in the file x, y1, y2 = process_line(line) X.append(x) Y1.append(y1) Y2.append(y2) #print np.array(X).shape,np.array(Y1).shape,np.array(Y2).shape cnt += 1 if cnt == batch_size: cnt = 0 yield (np.array(X), { 'cla': convert_cla(np.array(Y1), num_class), 'bbox': convert_bbox(np.array(Y2), 4) }) # 两个任务,加入两个标签 X = [] Y1 = [] Y2 = [] f.close()
def read_cnn_face_points(net_name): if net_name == 'p_net': CELEBA_NUM = config.PNET_CELEBA_NUM if net_name == 'r_net': CELEBA_NUM = config.RNET_CELEBA_NUM if net_name == 'o_net': CELEBA_NUM = config.ONET_CELEBA_NUM landmark_annotation_file = CELEBA_ANNO_LANDMARKS_FILE bbox_annotation_file = CELEBA_ANNO_BBOX_FILE data = [] with open(landmark_annotation_file, 'r', encoding='utf-8') as landmarkf: landmark_lines = landmarkf.readlines() with open(bbox_annotation_file, 'r', encoding='utf-8') as bboxf: bboxf_lines = bboxf.readlines() index = 0 for bbox_line, landmark_line in zip(bboxf_lines, landmark_lines): index += 1 if index <= 2: continue if index > CELEBA_NUM: break bbox_line = bbox_line.strip() bbox_splits = bbox_line.split(' ') image_file = bbox_splits[0] bbox_arr = [ item for item in bbox_splits[1:] if item != ''] bbox_np = np.array(bbox_arr, dtype=np.int) x1, y1, w, h = bbox_np[0], bbox_np[1], bbox_np[2], bbox_np[3] bbox = convert_bbox((x1, y1, w, h), False) landmark_line = landmark_line.strip() landmark_splits = landmark_line.split(' ') landmark_arr = [ item for item in landmark_splits[1:] if item != ''] landmark_np = np.array(landmark_arr, dtype=np.int) landmark = [(float(landmark_np[2 * i]), float(landmark_np[2 * i + 1])) for i in range(0, 5)] #print('path---:',os.path.join(CELEBA_IMG_DIR, image_file)) #print('bbox---:',bbox) #print('landmark---:',landmark) data.append({ 'image_file': os.path.join(CELEBA_IMG_DIR, image_file), 'bbox': bbox, 'landmark': landmark }) if len(data) % 1000 == 0: print(" 1000 images done", index) size = NET_SIZE[net_name] face_images, face_landmarks = process_data(data, size, True) save_data(face_images, face_landmarks, net_name, index) data = []
def widerface_data_loader(skip=0, **kwargs): """ generator function load data from WIDER FACE dataset params from kwargs: widerface_images_dir, widerface_annos_dir, widerface_annos_file """ images_dir = kwargs.get('widerface_images_dir', widerface_images_dir) annos_dir = kwargs.get('widerface_annos_dir', widerface_annos_dir) annos_file = kwargs.get('widerface_annos_file', widerface_annos_file) annos_path = os.path.join(annos_dir, annos_file) lines = linecache.getlines(annos_path) n_lines = len(lines) while True: idx = 0 while idx < n_lines: image_name = lines[idx].strip() assert '/' in image_name n_faces = int(lines[idx + 1]) if skip > 0: idx += 1 + n_faces + 1 if skip % 5000 == 0: print('[WIDER FACE loader]: skipping. %d remaining.' % (skip)) skip -= 1 continue image_path = os.path.join(images_dir, image_name) image = cv2.imread(image_path) bboxes = [] for i in range(n_faces): anno = lines[idx + 2 + i].strip().split() anno = list(map(int, anno)) x1, y1, w, h = anno[0], anno[1], anno[2], anno[3] box = utils.convert_bbox((x1, y1, w, h), False) bboxes.append(box) response = yield image, bboxes, image_path if response == DataState.stop: return # idx += 1 + n_faces + 1 if n_faces == 0: idx += 3 else: idx += 1 + n_faces + 1
def convert_annotation(self, image_id, classes_map): out_filename = os.path.join(self.darknet.labels_dir, self.name, 'labels', image_id + '.txt') if os.path.exists(out_filename): return True in_filename = os.path.join(self.ano_dir, image_id + '.xml') if not os.path.exists(in_filename): return False in_file = open(in_filename) tree = ET.parse(in_file) root = tree.getroot() size = root.find('size') d = int(size.find('depth').text) if d != 3: print(image_id, ' is greyscale. Skipping.') return False w = float(size.find('width').text) h = float(size.find('height').text) if w < 1 or h < 1: print(image_id, 'width and/or height == 0') return False class_bboxes = [] for obj in root.iter('object'): difficult = obj.find('difficult') difficult = difficult.text if difficult is not None else 0 if int(difficult) == 1: print(image_id, 'difficult == 1') continue alias = obj.find('name').text if alias not in self.aliases: print(image_id, 'skipping label', alias) continue cls = self.aliases[alias] xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) class_bboxes.append((cls, b)) if len(class_bboxes) == 0: print(image_id, 'no bounding boxes detected') return False if not os.path.exists(out_filename.rpartition(os.sep)[0]): os.makedirs(out_filename.rpartition(os.sep)[0]) out_file = open(out_filename, 'w') for cls, b in class_bboxes: bb = convert_bbox((w, h), b) out_file.write( str(classes_map[cls]) + " " + " ".join(map(str, bb)) + '\n') self.class_counts[cls] += 1 in_file.close() out_file.close() return True
def main(input_net_name): net_name = input_net_name assert net_name in config.NET_NAMES if net_name == 'p_net': BACKGRAND_NEG_NUM = config.PNET_BACKGRAND_NEG_NUM FACE_NEG_NUM = config.PNET_FACE_NEG_NUM POS_PART_NUM = config.PNET_POS_PART_NUM WIDEFACE_NUM = config.PNET_WIDEFACE_NUM if net_name == 'r_net': BACKGRAND_NEG_NUM = config.RNET_BACKGRAND_NEG_NUM FACE_NEG_NUM = config.RNET_FACE_NEG_NUM POS_PART_NUM = config.RNET_POS_PART_NUM WIDEFACE_NUM = config.RNET_WIDEFACE_NUM if net_name == 'o_net': BACKGRAND_NEG_NUM = config.ONET_BACKGRAND_NEG_NUM FACE_NEG_NUM = config.ONET_FACE_NEG_NUM POS_PART_NUM = config.ONET_POS_PART_NUM WIDEFACE_NUM = config.ONET_WIDEFACE_NUM images_dir = config.WIDER_FACE_IMG_DIR anno_file = config.WIDER_FACE_ANNO_FILE out_dir = config.GAN_DATA_ROOT_DIR target_size = config.NET_SIZE[net_name] save_dir = '{}/{}'.format(out_dir, net_name) pos_save_dir = save_dir + '/positive' part_save_dir = save_dir + '/part' neg_save_dir = save_dir + '/negative' if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(pos_save_dir): os.mkdir(pos_save_dir) if not os.path.exists(part_save_dir): os.mkdir(part_save_dir) if not os.path.exists(neg_save_dir): os.mkdir(neg_save_dir) f1 = open(os.path.join(save_dir, 'pos_' + str(target_size) + '.txt'), 'a') f2 = open(os.path.join(save_dir, 'neg_' + str(target_size) + '.txt'), 'a') f3 = open(os.path.join(save_dir, 'part_' + str(target_size) + '.txt'), 'a') with open(anno_file, 'r') as f: annotations = f.readlines() n_lines = len(annotations) print('%d pics in total' % n_lines) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care idx = 0 box_idx = 0 idx_line = 0 while idx_line < n_lines: #获取图片及bbox image_name = annotations[idx_line].strip() assert '/' in image_name n_faces = int(annotations[idx_line + 1]) image_path = os.path.join(images_dir, image_name) img = cv2.imread(image_path) bboxes = [] for i in range(n_faces): anno = annotations[idx_line + 2 + i].strip().split() anno = list(map(int, anno)) x1, y1, w, h = anno[0], anno[1], anno[2], anno[3] box = utils.convert_bbox((x1, y1, w, h), False) bboxes.append(box) bboxes = np.array(bboxes, dtype=np.float32) idx_line += 1 + n_faces + 1 #生成样本 idx += 1 if idx % 1000 == 0: print(idx, 'images done') if idx > WIDEFACE_NUM: break height, width, channel = img.shape #随机生成 negative 样本, neg_num = 0 while neg_num < BACKGRAND_NEG_NUM: size = npr.randint(target_size, min(width, height) / 2) nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) crop_box = np.array([nx, ny, nx + size, ny + size]) _iou = utils.iou(crop_box, bboxes) cropped_im = img[ny:ny + size, nx:nx + size, :] resized_im = cv2.resize(cropped_im, (target_size, target_size), interpolation=cv2.INTER_LINEAR) if np.max(_iou) < 0.3: # _iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, '%s.jpg' % n_idx) f2.write(save_dir + '/negative/%s' % n_idx + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 print('{} images done, pos: {}, part: {}, neg: {}'.format( idx, p_idx, d_idx, n_idx)) #以标记的box为中心,分别生成 negative 5个、positive 和 part 共20个 三种样本, for box in bboxes: x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 if max(w, h) < 40 or x1 < 0 or y1 < 0: continue # 生成 negative for i in range(FACE_NEG_NUM): size = npr.randint(target_size, min(width, height) / 2) # delta_x and delta_y are offsets of (x1, y1) delta_x = npr.randint(max(-size, -x1), w) delta_y = npr.randint(max(-size, -y1), h) nx1 = int(max(0, x1 + delta_x)) ny1 = int(max(0, y1 + delta_y)) if nx1 + size > width or ny1 + size > height: continue crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) _iou = utils.iou(crop_box, bboxes) cropped_im = img[ny1:ny1 + size, nx1:nx1 + size, :] resized_im = cv2.resize(cropped_im, (target_size, target_size), interpolation=cv2.INTER_LINEAR) #neg iou 小于0.3 if np.max(_iou) < 0.3: # _iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_dir + "/negative/%s" % n_idx + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 # pos 和 part 样本 for i in range(POS_PART_NUM): size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) delta_x = npr.randint(-w * 0.2, w * 0.2) delta_y = npr.randint(-h * 0.2, h * 0.2) nx1 = int(max(x1 + w / 2 + delta_x - size / 2, 0)) ny1 = int(max(y1 + h / 2 + delta_y - size / 2, 0)) nx2 = nx1 + size ny2 = ny1 + size if nx2 > width or ny2 > height: continue crop_box = np.array([nx1, ny1, nx2, ny2]) offset_x1 = (x1 - nx1) / float(size) offset_y1 = (y1 - ny1) / float(size) offset_x2 = (x2 - nx2) / float(size) offset_y2 = (y2 - ny2) / float(size) cropped_im = img[ny1:ny2, nx1:nx2, :] resized_im = cv2.resize(cropped_im, (target_size, target_size), interpolation=cv2.INTER_LINEAR) box_ = box.reshape(1, -1) if utils.iou(crop_box, box_) >= 0.65: save_file = os.path.join(pos_save_dir, '%s.jpg' % p_idx) f1.write(save_dir + '/positive/%s' % p_idx + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif utils.iou(crop_box, box_) >= 0.4: save_file = os.path.join(part_save_dir, '%s.jpg' % d_idx) f3.write(save_dir + '/part/%s' % d_idx + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 box_idx += 1 print('{} images done, pos: {}, part: {}, neg: {}'.format( idx, p_idx, d_idx, n_idx)) f1.close() f2.close() f3.close()
def augmented_data_generator(**kwargs): """ training data generator for MTCNN @param dst_size: output image size (dst_size, dst_size) @param pos_cnt: expected count of positive samples in a batch @param part_cnt: expected count of partial samples in a batch @param neg_cnt: expected count of negative samples in a batch @param ldmk_cnt: expected count of landmark samples in a batch @param double_aug: if set to True, the size of batches will double (using image augmentaion) @param skip: if nonzero, given number of images will be skipped (default zero) @param min_face: minimum face size according to the paper, pos_cnt : part_cnt : neg_cnt : ldmk_cnt should be 1 : 1 : 3 : 2 """ dst_size = kwargs.get('dst_size', 12) pos_cnt = kwargs.get('pos_cnt', 10) part_cnt = kwargs.get('part_cnt', 10) neg_cnt = kwargs.get('neg_cnt', 30) ldmk_cnt = kwargs.get('ldmk_cnt', 20) double_aug = kwargs.get('double_aug', False) skip = kwargs.get('skip', 0) min_face = kwargs.get('min_face', 12) """ record data format: [ type, cls_0, cls1] [ type, bbox1, ..., bbox4 ] [ type, ldmk1, ... ldmk10 ] for positivie samples: type = SampleType.positive.value cls_0 = 0, cls_1 = 1 bboxes: real ldmks: nan for negative samples: type = SampleType.negative.value cls_0 = 1, cls_1 = 0 bboxes: real ldmks: nan for partial samples: type = SampleType.partial.value cls: nan bboxes: real ldmks: nan for landmark samples: type = SampleType.landmark.value cls: nan bboxes: nan ldmks: real """ """ # positive IoU threshold: 0.65+ # partial IoU threshold: 0.4 - 0.65 # negative IoU threshold: 0.3- """ #pos_threshold_low = 0.65 #neg_threshold_high = 0.3 #part_threshold_low = 0.4 widerface_loader = widerface_data_loader(skip=skip) celeba_loader = celeba_data_loader(skip=skip) loop_threshold = (pos_cnt + part_cnt + neg_cnt) * 10 batch = 0 while True: images, face_cls, bbox_reg, ldmk_reg = [], [], [], [] # process images from WIDER FACE dataset img, boxes, _ = widerface_loader.send(None) h_img, w_img, _ = img.shape img_size = (w_img, h_img) boxes = np.array(boxes) n_pos, n_part, n_neg = 0, 0, 0 loop_cnt = 0 observe_flag = False pos_threshold_low = 0.65 neg_threshold_high = 0.3 part_threshold_low = 0.4 no_proper_faces_found = True while n_pos < pos_cnt or n_part < part_cnt or n_neg < neg_cnt: def append_images_and_bboxes(im, gtbox, crbox, label): x1, y1, x2, y2, w, h = utils.unpack_bbox(crbox) dx1 = (gtbox[0] - x1) / w dy1 = (gtbox[1] - y1) / h dx2 = (gtbox[2] - x2) / w dy2 = (gtbox[3] - y2) / h # suppose it should be a one-hot vector cls_0, cls_1 = np.nan, np.nan if label == SampleType.negative: cls_0, cls_1 = 1, 0 elif label == SampleType.positive: cls_0, cls_1 = 0, 1 dummy_ldmks = [np.nan] * 10 face_cls.append([label.value, cls_0, cls_1]) bbox_reg.append([label.value, dx1, dy1, dx2, dy2]) ldmk_reg.append([label.value] + dummy_ldmks) cropped = utils.crop_image(im, crbox) resized = cv2.resize(cropped, (dst_size, dst_size)) images.append(resized) try: for box in boxes: x1, y1, w, h = utils.convert_bbox(box, True) if max(w, h) < min_face: # bounding box too small, discard it continue no_proper_faces_found = False if n_pos < pos_cnt or n_part < part_cnt: crop_box = utils.bbox_positive_sampling(box) if utils.is_valid_bbox(crop_box, img_size): iou = utils.IoU(crop_box, boxes) iou = np.max(iou) #if observe_flag: # print(iou) if iou >= pos_threshold_low and n_pos < pos_cnt: n_pos += 1 #cv2.imshow('positive', utils.crop_image(img, crop_box)) append_images_and_bboxes(img, box, crop_box, SampleType.positive) elif iou >= part_threshold_low and n_part < part_cnt: n_part += 1 #cv2.imshow('partial', utils.crop_image(img, crop_box)) append_images_and_bboxes(img, box, crop_box, SampleType.partial) if n_neg < neg_cnt: crop_box = utils.bbox_global_negative_sampling(box, img_size, dst_size) if utils.is_valid_bbox(crop_box, img_size): iou = utils.IoU(crop_box, boxes) iou = np.max(iou) #if observe_flag: # print(iou) if iou < neg_threshold_high: n_neg += 1 #cv2.imshow('negative', utils.crop_image(img, crop_box)) append_images_and_bboxes(img, box, crop_box, SampleType.negative) if n_neg < neg_cnt: crop_box = utils.bbox_local_negative_sampling(box, dst_size) if utils.is_valid_bbox(crop_box, img_size): iou = utils.IoU(crop_box, boxes) iou = np.max(iou) #if observe_flag: # print(iou) if iou < neg_threshold_high: n_neg += 1 append_images_and_bboxes(img, box, crop_box, SampleType.negative) loop_cnt += 1 if loop_cnt > loop_threshold * 2: # we can't handle these bounding boxes, skip observe_flag = False no_proper_faces_found = True break elif loop_cnt > loop_threshold and not observe_flag: # adjust IoU threshold pos_threshold_low = 0.55 neg_threshold_high = 0.4 observe_flag = True except: # there might be a few exceptions, try to ignore them? continue if no_proper_faces_found: break if no_proper_faces_found: continue # process images from CelebA dataset img, ldmks, _ = celeba_loader.send(None) h_img, w_img, _ = img.shape img_size = (h_img, w_img) n_ldmk = 0 while n_ldmk < ldmk_cnt: try: box = utils.crop_bbox_for_facial_landmarks(ldmks) if utils.is_valid_bbox(box, img_size): n_ldmk += 1 angle = np.random.random_integers(-15, 15) aug_img, ldmks = utils.rotate_facial_landmarks(img, ldmks, box, angle) aug_img = utils.crop_image(aug_img, box) aug_img = utils.adjust_hue_and_saturation(aug_img) aug_img = utils.adjust_lighting_naive(aug_img) resized = cv2.resize(aug_img, (dst_size, dst_size)) images.append(resized) x1, y1, w, h = utils.convert_bbox(box, True) d_ldmks = [] for x, y in ldmks: dx = (x - x1) / w dy = (y - y1) / h d_ldmks.extend((dx, dy)) dummy_cls_bbox = [np.nan] * 4 face_cls.append([SampleType.landmark.value, np.nan, np.nan]) bbox_reg.append([SampleType.landmark.value] + dummy_cls_bbox) ldmk_reg.append([SampleType.landmark.value] + d_ldmks) except: continue if double_aug: aug_fn = lambda im: utils.adjust_lighting_naive(utils.adjust_hue_and_saturation(im)) images.extend(list(map(aug_fn, images))) face_cls.extend(face_cls) bbox_reg.extend(bbox_reg) ldmk_reg.extend(ldmk_reg) # do shuffle ? (we may left that to .fit_generator) images = list(map(utils.normalize_image, images)) yield np.array(images), { 'face_cls': np.array(face_cls), 'bbox_reg': np.array(bbox_reg), 'ldmk_reg': np.array(ldmk_reg) } batch += 1