def main(): size = 12 net = 'native_'+str(size) with open('%s/pos_%s.txt' % (net, size), 'r') as f: pos = f.readlines() with open('%s/neg_%s.txt' % (net, size), 'r') as f: neg = f.readlines() with open('%s/part_%s.txt' % (net, size), 'r') as f: part = f.readlines() print('\n'+'pos') filename_cls = 'pnet_data_for_cls.tfrecords' print('Writing') examples = [] writer = tf.python_io.TFRecordWriter(filename_cls) cur_ = 0 sum_ = len(pos) for line in pos: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0]+'.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([0, 1], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features(feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw)})) examples.append(example) print('\n'+'neg') cur_ = 0 neg_keep = npr.choice(len(neg), size=1000000, replace=False) sum_ = len(neg_keep) for i in neg_keep: line = neg[i] view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0]+'.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([1, 0], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features(feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw)})) examples.append(example) print(len(examples)) random.shuffle(examples) for example in examples: writer.write(example.SerializeToString()) writer.close() print('\n'+'pos') cur_ = 0 filename_roi = 'pnet_data_for_bbx.tfrecords' print('Writing') sum_ = len(pos) examples = [] writer = tf.python_io.TFRecordWriter(filename_roi) for line in pos: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0]+'.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([float(words[2]), float(words[3]), float(words[4]), float(words[5])], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features(feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw)})) examples.append(example) print('\n'+'part') cur_ = 0 part_keep = npr.choice(len(part), size=300000, replace=False) sum_ = len(part_keep) for i in part_keep: view_bar(cur_, sum_) line = part[i] cur_ += 1 words = line.split() image_file_name = words[0]+'.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([float(words[2]), float(words[3]), float(words[4]), float(words[5])], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features(feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw)})) examples.append(example) print(len(examples)) random.shuffle(examples) for example in examples: writer.write(example.SerializeToString()) writer.close()
def generate_ft_or_svm_data(list_path, num_clss, save_path, threshold=0.5, is_svm=False, save=False): """" 按照给定的list文件生成 ft或 svm训练用的数据集。 :param list_path: path of fine_tune_list.txt. :param num_clss: number of class (include background). :param save_path: path to save generated example. :param threshold: threshold of IoU with ground truth. :param is_svm: if true, labels will be scalar instead of one hot vector. :param save: if true, save generated data as .npy files to save_path. :return: resized RPs (list of float 3D array) and labels (list of scalar or one hot). """ fr = open(list_path, 'r') train_list = fr.readlines() # random.shuffle(train_list) for num, line in enumerate(train_list): # 1 line = 1 image = 1 .npy labels = [] images = [] tmp = line.strip().split(' ') # [image path, label, rect GT] img = cv2.imread(tmp[0]) img_lbl, regions = ss.selective_search(img, scale=500, sigma=0.9, min_size=10) candidates = set() for r in regions: # excluding same rectangle (with different segments) if r['rect'] in candidates: continue # excluding small regions if r['size'] < 220: continue if (r['rect'][2] * r['rect'][3]) < 500: continue # 按照rect尺寸裁剪原图 proposal_img, proposal_rect = clip_pic(img, r['rect']) # Delete Empty array if len(proposal_img) == 0: continue # Ignore things contain 0 or not C contiguous array x, y, w, h = r['rect'] if w == 0 or h == 0: continue # Check if any 0-dimension exist [a, b, c] = np.shape(proposal_img) if a == 0 or b == 0 or c == 0: continue # resize RPs to the input size of CNN resized_proposal_img = resize_image(proposal_img, config.IMAGE_SIZE, config.IMAGE_SIZE) candidates.add(r['rect']) img_float = np.asarray(resized_proposal_img, dtype="float32") images.append(img_float) # IOU ref_rect = tmp[2].split(',') ref_rect_int = [int(i) for i in ref_rect] iou_val = IOU(ref_rect_int, proposal_rect) # attach labels according to IoU threshold, 0: background index = int(tmp[1]) if is_svm: if iou_val < threshold: labels.append(0) # negative example else: labels.append(index) # positive example else: # fine tune label = np.zeros(num_clss + 1) # one hot if iou_val < threshold: label[0] = 1 # negative else: label[index] = 1 # positive labels.append(label) tools.view_bar( "processing image of %s" % list_path.split('\\')[-1].strip(), num + 1, len(train_list)) if save: np.save((os.path.join( save_path, tmp[0].split('/')[-1].split('.')[0].strip()) + '_data.npy'), [images, labels]) print(' ') fr.close()
def main(args): image_size = 24 save_dir = str(image_size) anno_file = 'wider_face_train.txt' im_dir = 'WIDER_train/images/' neg_save_dir = save_dir+'/negative' pos_save_dir = save_dir+'/positive' part_save_dir = save_dir+'/part' if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(pos_save_dir): os.mkdir(pos_save_dir) if not os.path.exists(part_save_dir): os.mkdir(part_save_dir) if not os.path.exists(neg_save_dir): os.mkdir(neg_save_dir) f1 = open(save_dir+'/pos_24.txt', 'w') f2 = open(save_dir+'/neg_24.txt', 'w') f3 = open(save_dir+'/part_24.txt', 'w') threshold = 0.6 with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print('%d pics in total' % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care image_idx = 0 with tf.device('/gpu:0'): minsize = 20 factor = 0.709 model_file = args.pnet_model with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session(config=config) as sess: image = tf.placeholder(tf.float32, [None, None, None, 3]) pnet = PNet({'data': image}, mode='test') out_tensor = pnet.get_all_output() init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() saver.restore(sess, model_file) def pnet_fun(img): return sess.run( out_tensor, feed_dict={image: img}) for annotation in annotations: annotation = annotation.strip().split(' ') bbox = list(map(float, annotation[1:])) gts = np.array(bbox, dtype=np.float32).reshape(-1, 4) img_path = im_dir + annotation[0] + '.jpg' img = cv2.imread(img_path) rectangles = detect_face_12net(img, minsize, pnet_fun, threshold, factor) image_idx += 1 view_bar(image_idx, num) for box in rectangles: lis = box.astype(np.int32) mask = lis < 0 lis[mask] = 0 x_left, y_top, x_right, y_bottom, _ = lis crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size or crop_h < image_size: continue Iou = IoU(box, gts) cropped_im = img[y_top: y_bottom+1, x_left: x_right+1] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, '%s.jpg' % n_idx) f2.write('%s/negative/%s' % (save_dir, n_idx) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(crop_w) offset_y1 = (y1 - y_top) / float(crop_h) offset_x2 = (x2 - x_right) / float(crop_w) offset_y2 = (y2 - y_bottom) / float(crop_h) if np.max(Iou) >= 0.65: save_file = os.path.join(pos_save_dir, '%s.jpg' % p_idx) f1.write('%s/positive/%s' % (save_dir, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_save_dir, '%s.jpg' % d_idx) f3.write('%s/part/%s' % (save_dir, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def main(annotation_fp, image_dir, model_fp, output_dir): image_size = 24 save_dir = os.path.join(output_dir, str(image_size)) neg_save_dir = save_dir + '/negative' pos_save_dir = save_dir + '/positive' part_save_dir = save_dir + '/part' if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(pos_save_dir): os.mkdir(pos_save_dir) if not os.path.exists(part_save_dir): os.mkdir(part_save_dir) if not os.path.exists(neg_save_dir): os.mkdir(neg_save_dir) f1 = open(save_dir + '/pos_24.txt', 'w') f2 = open(save_dir + '/neg_24.txt', 'w') f3 = open(save_dir + '/part_24.txt', 'w') threshold = 0.6 with open(annotation_fp, 'r') as f: annotations = f.readlines() num = len(annotations) print('%d pics in total' % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care image_idx = 0 with tf.device('/gpu:0'): minsize = 20 factor = 0.709 with tf.Graph().as_default(): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session(config=config) as sess: image = tf.placeholder(tf.float32, [None, None, None, 3]) pnet = PNet({'data': image}, mode='test') out_tensor = pnet.get_all_output() init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver() saver.restore(sess, model_fp) def pnet_fun(img): return sess.run(out_tensor, feed_dict={image: img}) for annotation in annotations: annotation = annotation.strip().split(' ') bbox = list(map(float, annotation[1:5])) gts = np.array(bbox, dtype=np.float32).reshape(-1, 4) img_path = os.path.join(image_dir, annotation[0]) img = cv2.imread(img_path) rectangles = detect_face_12net(img, minsize, pnet_fun, threshold, factor) image_idx += 1 view_bar(image_idx, num) for box in rectangles: lis = box.astype(np.int32) mask = lis < 0 lis[mask] = 0 x_left, y_top, x_right, y_bottom, _ = lis crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size or crop_h < image_size: continue iou = IoU(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(iou) < 0.3: filename = str(n_idx) + '.jpg' # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, filename) f2.write( os.path.join(neg_save_dir, filename) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(crop_w) offset_y1 = (y1 - y_top) / float(crop_h) offset_x2 = (x2 - x_right) / float(crop_w) offset_y2 = (y2 - y_bottom) / float(crop_h) if np.max(iou) >= 0.65: filename = str(p_idx) + '.jpg' save_file = os.path.join( pos_save_dir, filename) f1.write( os.path.join(pos_save_dir, filename) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(iou) >= 0.4: filename = str(d_idx) + '.jpg' save_file = os.path.join( part_save_dir, filename) f3.write( os.path.join(part_save_dir, filename) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def generateDataForRegression(dataInfoFilePath, savePath, threshold=0.6): print("function = generateDataForRegression, filePath : ", dataInfoFilePath) f = open(dataInfoFilePath, "r") fileInfoList = f.readlines() # random.shuffle(fileInfoList) print("line num of dataInfoFile : ", len(fileInfoList)) lineIndex = 0 for line in fileInfoList: print(str(lineIndex) + " ---> Current line :", line) # read image file pathInfo in this line infoArr = line.split(" ") imageFilePath = infoArr[0] objectClass = int(infoArr[1]) objectBoxPositonInfo = infoArr[2] print("image path :", imageFilePath) print("object class :", objectClass) print("object position info str :", objectBoxPositonInfo) positionArrInString = objectBoxPositonInfo.split(",") notationRectArr = [int(s) for s in positionArrInString] # for s in positionArrInString: # notationRectArr.append(int(s)) print("object position arr :", notationRectArr) print("read imageFile info success...") # read img from filePath img = cv2.imread(imageFilePath) print("original img size: ", np.shape(img)) # print(img) # scale : size of the smallest region proposals # sigma : Width of Gaussian kernel for felzenszwalb segmentation # min_size : min size of regions img_lbl, regions = selectivesearch.selective_search(img, scale=500, sigma=0.9, min_size=10) # img_lbl ?????? img_lbl_0 = img_lbl[0] regions_0 = regions[0] print("child img size :", np.shape(img_lbl_0)) print(regions_0 ) # {'rect': (0, 0, 499, 441), 'size': 140000, 'labels': [0.0]} # choose proposal regions proposalRegionCandiatesInRect = set() choosedChildImgArr = [] choosedCorrectionCoefArr = [] for childImageInfo in regions: # childImageInfo : {'rect': (0, 0, 499, 441), 'size': 140000, 'labels': [0.0]} if (proposalRegionCandiatesInRect.__contains__( childImageInfo['rect'])): print(" ------ childImage exist in candidates set, continue.") continue # delete child images which is too small childImageSize = childImageInfo['size'] childImageRect = childImageInfo[ 'rect'] # 'rect' : xStart, yStart, width, length if (childImageInfo['size'] < 220 or childImageRect[2] * childImageRect[3] < 500): continue # crop original image by childImageRect childImg, childImgDetailRect = cropImage(img, childImageRect) childImgShape = np.shape(childImg) # check childImage if (len(childImg) == 0 or childImgShape[0] == 0 or childImgShape[1] == 0 or childImgShape[2] == 0): continue # resize image resizedChildImg = resize_image(childImg, 224, 224) proposalRegionCandiatesInRect.add(childImageRect) resizedChildImgInFloat = np.asarray(resizedChildImg, dtype="float32") # calc IOU # use rect in notation to compare with generated rect by selective search iouValue = calcIOU(notationRectArr, childImgDetailRect) print("IOU :", iouValue) if (iouValue > threshold): # this childImg can be used as training data for bbox regression # calc correctionCoef currCoef = calcCorrectionCoef(childImageRect, notationRectArr) print("correction coef :", currCoef) # add this childImg to result choosedChildImgArr.append(resizedChildImgInFloat) choosedCorrectionCoefArr.append(currCoef) tools.view_bar( "processing image of %s" % dataInfoFilePath.split('\\')[-1].strip(), lineIndex + 1, len(fileInfoList)) lineIndex += 1 # save childImg collection to npy file originalImgFileName = imageFilePath.split('/')[-1] headOfOriginalImgFileName = originalImgFileName.split('.')[0].strip() tailOfGeneratedFileName = '_data.npy' generatedFilePath = os.path.join( savePath, headOfOriginalImgFileName) + tailOfGeneratedFileName np.save(generatedFilePath, [choosedChildImgArr, choosedCorrectionCoefArr]) print("store bbox regression training data into file :", generatedFilePath) f.close()
def main(args): image_size = 48 save_dir = 'hard_' + str(image_size) anno_file = 'AWE_train.txt' im_dir = 'AWE_train/' neg_save_dir = save_dir + '/negative' pos_save_dir = save_dir + '/positive' part_save_dir = save_dir + '/part' if not os.path.exists(save_dir): os.mkdir(save_dir) if not os.path.exists(pos_save_dir): os.mkdir(pos_save_dir) if not os.path.exists(part_save_dir): os.mkdir(part_save_dir) if not os.path.exists(neg_save_dir): os.mkdir(neg_save_dir) f1 = open(save_dir + '/pos_48.txt', 'w') f2 = open(save_dir + '/neg_48.txt', 'w') f3 = open(save_dir + '/part_48.txt', 'w') threshold = [0.6, 0.6] with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print('%d pics in total' % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # dont care image_idx = 0 with tf.device('/gpu:0'): minsize = 20 factor = 0.709 model_file_pnet = args.pnet_model model_file_rnet = args.rnet_model with tf.Graph().as_default(): config = tf.compat.v1.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.8 config.gpu_options.allow_growth = True with tf.compat.v1.Session(config=config) as sess: image_pnet = tf.compat.v1.placeholder(tf.float32, [None, None, None, 3]) pnet = PNet({'data': image_pnet}, mode='test') out_tensor_pnet = pnet.get_all_output() image_rnet = tf.compat.v1.placeholder(tf.float32, [None, 24, 24, 3]) rnet = RNet({'data': image_rnet}, mode='test') out_tensor_rnet = rnet.get_all_output() saver_pnet = tf.compat.v1.train.Saver([ v for v in tf.compat.v1.global_variables() if v.name[0:4] == 'pnet' ]) saver_rnet = tf.compat.v1.train.Saver([ v for v in tf.compat.v1.global_variables() if v.name[0:4] == 'rnet' ]) saver_pnet.restore(sess, model_file_pnet) saver_rnet.restore(sess, model_file_rnet) def pnet_fun(img): return sess.run(out_tensor_pnet, feed_dict={image_pnet: img}) def rnet_fun(img): return sess.run(out_tensor_rnet, feed_dict={image_rnet: img}) for annotation in annotations: annotation = annotation.strip().split(' ') bbox = list(map(float, annotation[1:])) gts = np.array(bbox, dtype=np.float32).reshape(-1, 4) img_path = im_dir + annotation[0] img = cv2.imread(img_path) rectangles = detect_face_24net(img, minsize, pnet_fun, rnet_fun, threshold, factor) image_idx += 1 view_bar(image_idx, num) for box in rectangles: lis = box.astype(np.int32) mask = lis < 0 lis[mask] = 0 x_left, y_top, x_right, y_bottom, _ = lis crop_w = x_right - x_left + 1 crop_h = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if crop_w < image_size or crop_h < image_size: continue Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, '%s.jpg' % n_idx) f2.write('hard_%s/negative/%s' % (image_size, n_idx) + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(crop_w) offset_y1 = (y1 - y_top) / float(crop_h) offset_x2 = (x2 - x_right) / float(crop_w) offset_y2 = (y2 - y_bottom) / float(crop_h) if np.max(Iou) >= 0.65: save_file = os.path.join( pos_save_dir, '%s.jpg' % p_idx) f1.write('hard_%s/positive/%s' % (image_size, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join( part_save_dir, '%s.jpg' % d_idx) f3.write('hard_%s/part/%s' % (image_size, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 f1.close() f2.close() f3.close()
def main(): size = 48 net = str(size) with open('%s/pos_%s.txt' % (net, size), 'r') as f: pos_hard = f.readlines() with open('%s/neg_%s.txt' % (net, size), 'r') as f: neg_hard = f.readlines() with open('%s/part_%s.txt' % (net, size), 'r') as f: part_hard = f.readlines() with open('native_' + '%s/pos_%s.txt' % (net, size), 'r') as f: pos = f.readlines() with open('native_' + '%s/neg_%s.txt' % (net, size), 'r') as f: neg = f.readlines() with open('native_' + '%s/part_%s.txt' % (net, size), 'r') as f: part = f.readlines() print('\n' + 'positive hard') cur_ = 0 sum_ = len(pos_hard) filename_cls = 'onet_data_for_cls.tfrecords' print('Writing') examples = [] writer = tf.python_io.TFRecordWriter(filename_cls) for line in pos_hard: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([0, 1], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) print('\n' + 'positive random cropped') cur_ = 0 pos_keep = npr.choice(len(pos), size=20000, replace=False) sum_ = len(pos_keep) print('Writing') for i in pos_keep: view_bar(cur_, sum_) cur_ += 1 line = pos[i] words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([0, 1], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) print('\n' + 'negative random cropped') cur_ = 0 neg_keep = npr.choice(len(neg), size=300000, replace=False) sum_ = len(neg_keep) for i in neg_keep: view_bar(cur_, sum_) cur_ += 1 line = neg[i] words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([1, 0], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) print('\n' + 'negative hard') cur_ = 0 sum_ = len(neg_hard) for line in neg_hard: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([1, 0], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) random.shuffle(examples) for example in examples: writer.write(example.SerializeToString()) writer.close() print('\n' + 'positive random cropped') cur_ = 0 filename_roi = 'onet_data_for_bbx.tfrecords' print('Writing') sum_ = len(pos_keep) examples = [] writer = tf.python_io.TFRecordWriter(filename_roi) for i in pos_keep: view_bar(cur_, sum_) cur_ += 1 line = pos[i] words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([ float(words[2]), float(words[3]), float(words[4]), float(words[5]) ], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) print('\n' + 'positive hard') cur_ = 0 print('Writing') sum_ = len(pos_hard) for line in pos_hard: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([ float(words[2]), float(words[3]), float(words[4]), float(words[5]) ], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) print('\n' + 'part hard') cur_ = 0 sum_ = len(part_hard) for line in part_hard: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([ float(words[2]), float(words[3]), float(words[4]), float(words[5]) ], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) print('\n' + 'part random cropped') cur_ = 0 part_keep = npr.choice(len(part), size=100000, replace=False) sum_ = len(part_keep) for i in part_keep: view_bar(cur_, sum_) line = part[i] cur_ += 1 words = line.split() image_file_name = words[0] + '.jpg' im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 48 or w != 48: im = cv2.resize(im, (48, 48)) im = im.astype('uint8') label = np.array([ float(words[2]), float(words[3]), float(words[4]), float(words[5]) ], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) random.shuffle(examples) for example in examples: writer.write(example.SerializeToString()) writer.close()
def load_train_proposals(datafile, num_clss, save_path, threshold=0.5, is_svm=False, save=False): fr = open(datafile, 'r') train_list = fr.readlines() # random.shuffle(train_list) for num, line in enumerate(train_list): labels = [] images = [] rects = [] tmp = line.strip().split(' ') # tmp0 = image address # tmp1 = label # tmp2 = rectangle vertices img_path = tmp[0] img = cv2.imread(tmp[0]) # 选择搜索得到候选框 img_lbl, regions = selective_search(img_path, neighbor=8, scale=500, sigma=0.9, min_size=20) candidates = set() ref_rect = tmp[2].split(',') ref_rect_int = [int(i) for i in ref_rect] Gx = ref_rect_int[0] Gy = ref_rect_int[1] Gw = ref_rect_int[2] Gh = ref_rect_int[3] for r in regions: # excluding same rectangle (with different segments) if r['rect'] in candidates: continue # excluding small regions if r['size'] < 220: continue if (r['rect'][2] * r['rect'][3]) < 500: continue # 截取目标区域 proposal_img, proposal_vertice = clip_pic(img, r['rect']) # Delete Empty array if len(proposal_img) == 0: continue # Ignore things contain 0 or not C contiguous array x, y, w, h = r['rect'] if w == 0 or h == 0: continue # Check if any 0-dimension exist [a, b, c] = np.shape(proposal_img) if a == 0 or b == 0 or c == 0: continue # resize到227*227 resized_proposal_img = resize_image(proposal_img, config.IMAGE_SIZE, config.IMAGE_SIZE) candidates.add(r['rect']) img_float = np.asarray(resized_proposal_img, dtype="float32") images.append(img_float) # IOU iou_val = IOU(ref_rect_int, proposal_vertice) # x,y,w,h作差,用于boundingbox回归 rects.append([(Gx-x)/w, (Gy-y)/h, math.log(Gw/w), math.log(Gh/h)]) # propasal_rect = [proposal_vertice[0], proposal_vertice[1], proposal_vertice[4], proposal_vertice[5]] # print(iou_val) # labels, let 0 represent default class, which is background index = int(tmp[1]) if is_svm: # iou小于阈值,为背景,0 if iou_val < threshold: labels.append(0) elif iou_val > 0.6: # 0.85 labels.append(index) else: labels.append(-1) else: label = np.zeros(num_clss + 1) if iou_val < threshold: label[0] = 1 else: label[index] = 1 labels.append(label) if is_svm: ref_img, ref_vertice = clip_pic(img, ref_rect_int) resized_ref_img = resize_image(ref_img, config.IMAGE_SIZE, config.IMAGE_SIZE) img_float = np.asarray(resized_ref_img, dtype="float32") images.append(img_float) rects.append([0, 0, 0, 0]) labels.append(index) tools.view_bar("processing image of %s" % datafile.split('\\')[-1].strip(), num + 1, len(train_list)) if save: if is_svm: # strip()去除首位空格 np.save((os.path.join(save_path, tmp[0].split('/')[-1].split('.')[0].strip()) + '_data.npy'), [images, labels, rects]) else: # strip()去除首位空格 np.save((os.path.join(save_path, tmp[0].split('/')[-1].split('.')[0].strip()) + '_data.npy'), [images, labels]) print(' ') fr.close()
def train_svms(train_file_folder, model): # 这里,我们将不同的训练集合分配到不同的txt文件里,每一个文件只含有一个种类 files = os.listdir(train_file_folder) svms = [] train_features = [] bbox_train_features = [] rects = [] for train_file in files: if train_file.split('.')[-1] == 'txt': pred_last = -1 pred_now = 0 X, Y, R = generate_single_svm_train(os.path.join(train_file_folder, train_file)) Y1 = [] features1 = [] Y_hard = [] features_hard = [] for ind, i in enumerate(X): # extract features 提取特征 feats = model.predict([i]) train_features.append(feats[0]) # 所有正负样本加入feature1,Y1 if Y[ind]>=0: Y1.append(Y[ind]) features1.append(feats[0]) # 对与groundtruth的iou>0.6的加入boundingbox训练集 if Y[ind]>0: bbox_train_features.append(feats[0]) rects.append(R[ind]) # 剩下作为测试集 else: Y_hard.append(Y[ind]) features_hard.append(feats[0]) tools.view_bar("extract features of %s" % train_file, ind + 1, len(X)) # 难负例挖掘 clf = SVC(probability=True) # 训练直到准确率不再提高 while pred_now > pred_last: clf.fit(features1, Y1) features_new_hard = [] Y_new_hard = [] index_new_hard = [] # 统计测试正确数量 count = 0 for ind, i in enumerate(features_hard): # print(clf.predict([i.tolist()])[0]) if clf.predict([i.tolist()])[0] == 0: count += 1 # 如果被误判为正样本,加入难负例集合 elif clf.predict([i.tolist()])[0] > 0: # 找到被误判的难负例 features_new_hard.append(i) Y_new_hard.append(clf.predict_proba([i.tolist()])[0][1]) index_new_hard.append(ind) # 如果难负例样本过少,停止迭代 if len(features_new_hard)/10<1: break pred_last = pred_now # 计算新的测试正确率 pred_now = count / len(features_hard) # print(pred_now) # 难负例样本根据分类概率排序,取前10%作为负样本加入训练集 sorted_index = np.argsort(-np.array(Y_new_hard)).tolist()[0:int(len(features_new_hard)/10)] for idx in sorted_index: index = index_new_hard[idx] features1.append(features_new_hard[idx]) Y1.append(0) # 测试集中删除这些作为负样本加入训练集的样本。 features_hard.pop(index) Y_hard.pop(index) print(' ') print("feature dimension") print(np.shape(features1)) svms.append(clf) # 将clf序列化,保存svm分类器 joblib.dump(clf, os.path.join(train_file_folder, str(train_file.split('.')[0]) + '_svm.pkl')) # 保存boundingbox回归训练集 np.save((os.path.join(train_file_folder, 'bbox_train.npy')), [bbox_train_features, rects]) # print(rects[0]) return svms
def main(input_size, classifier_tfrecord_fp, localizer_tfrecord_fp, root_data_dir): net = os.path.join(root_data_dir, 'native_' + str(input_size)) with open('%s/pos_%s.txt' % (net, input_size), 'r') as f: pos = f.readlines() with open('%s/neg_%s.txt' % (net, input_size), 'r') as f: neg = f.readlines() with open('%s/part_%s.txt' % (net, input_size), 'r') as f: part = f.readlines() print('\n' + 'pos') print('Writing') examples = [] writer = tf.python_io.TFRecordWriter(classifier_tfrecord_fp) cur_ = 0 sum_ = len(pos) for line in pos: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0] im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([0, 1], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print('\n' + 'neg') cur_ = 0 neg_keep = npr.choice(len(neg), size=min(len(neg), 1000000), replace=False) sum_ = len(neg_keep) for i in neg_keep: line = neg[i] view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0] im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([1, 0], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) random.shuffle(examples) for example in examples: writer.write(example.SerializeToString()) writer.close() print('\n' + 'pos') cur_ = 0 print('Writing') sum_ = len(pos) examples = [] writer = tf.python_io.TFRecordWriter(localizer_tfrecord_fp) for line in pos: view_bar(cur_, sum_) cur_ += 1 words = line.split() image_file_name = words[0] im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([ float(words[2]), float(words[3]), float(words[4]), float(words[5]) ], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print('\n' + 'part') cur_ = 0 part_keep = npr.choice(len(part), size=min(len(part), 300000), replace=False) sum_ = len(part_keep) for i in part_keep: view_bar(cur_, sum_) line = part[i] cur_ += 1 words = line.split() image_file_name = words[0] im = cv2.imread(image_file_name) h, w, ch = im.shape if h != 12 or w != 12: im = cv2.resize(im, (12, 12)) im = im.astype('uint8') label = np.array([ float(words[2]), float(words[3]), float(words[4]), float(words[5]) ], dtype='float32') label_raw = label.tostring() image_raw = im.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'label_raw': bytes_feature(label_raw), 'image_raw': bytes_feature(image_raw) })) examples.append(example) print(len(examples)) random.shuffle(examples) for example in examples: writer.write(example.SerializeToString()) writer.close()