def main():

    size = 12
    net = 'native_'+str(size)

    with open('%s/pos_%s.txt' % (net, size), 'r') as f:
        pos = f.readlines()
    with open('%s/neg_%s.txt' % (net, size), 'r') as f:
        neg = f.readlines()
    with open('%s/part_%s.txt' % (net, size), 'r') as f:
        part = f.readlines()

    print('\n'+'pos')
    filename_cls = 'pnet_data_for_cls.tfrecords'
    print('Writing')
    examples = []
    writer = tf.python_io.TFRecordWriter(filename_cls)
    cur_ = 0
    sum_ = len(pos)
    for line in pos:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0]+'.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([0, 1], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
            'label_raw': bytes_feature(label_raw),
            'image_raw': bytes_feature(image_raw)}))
        examples.append(example)

    print('\n'+'neg')
    cur_ = 0
    neg_keep = npr.choice(len(neg), size=1000000, replace=False)
    sum_ = len(neg_keep)
    for i in neg_keep:
        line = neg[i]
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0]+'.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([1, 0], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
            'label_raw': bytes_feature(label_raw),
            'image_raw': bytes_feature(image_raw)}))
        examples.append(example)
    print(len(examples))
    random.shuffle(examples)
    for example in examples:
        writer.write(example.SerializeToString())
    writer.close()

    print('\n'+'pos')
    cur_ = 0
    filename_roi = 'pnet_data_for_bbx.tfrecords'
    print('Writing')
    sum_ = len(pos)
    examples = []
    writer = tf.python_io.TFRecordWriter(filename_roi)
    for line in pos:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0]+'.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([float(words[2]), float(words[3]),
                          float(words[4]), float(words[5])],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
            'label_raw': bytes_feature(label_raw),
            'image_raw': bytes_feature(image_raw)}))
        examples.append(example)

    print('\n'+'part')
    cur_ = 0
    part_keep = npr.choice(len(part), size=300000, replace=False)
    sum_ = len(part_keep)
    for i in part_keep:
        view_bar(cur_, sum_)
        line = part[i]
        cur_ += 1
        words = line.split()
        image_file_name = words[0]+'.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([float(words[2]), float(words[3]),
                          float(words[4]), float(words[5])],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
            'label_raw': bytes_feature(label_raw),
            'image_raw': bytes_feature(image_raw)}))
        examples.append(example)
    print(len(examples))
    random.shuffle(examples)
    for example in examples:
        writer.write(example.SerializeToString())
    writer.close()
Esempio n. 2
0
def generate_ft_or_svm_data(list_path,
                            num_clss,
                            save_path,
                            threshold=0.5,
                            is_svm=False,
                            save=False):
    """"
    按照给定的list文件生成 ft或 svm训练用的数据集。

    :param list_path: path of fine_tune_list.txt.
    :param num_clss: number of class (include background).
    :param save_path: path to save generated example.
    :param threshold: threshold of IoU with ground truth.
    :param is_svm: if true, labels will be scalar instead of one hot vector.
    :param save: if true, save generated data as .npy files to save_path.
    :return: resized RPs (list of float 3D array) and labels (list of scalar or
    one hot).
    """
    fr = open(list_path, 'r')
    train_list = fr.readlines()
    # random.shuffle(train_list)
    for num, line in enumerate(train_list):  # 1 line = 1 image = 1 .npy
        labels = []
        images = []
        tmp = line.strip().split(' ')  # [image path, label, rect GT]
        img = cv2.imread(tmp[0])
        img_lbl, regions = ss.selective_search(img,
                                               scale=500,
                                               sigma=0.9,
                                               min_size=10)
        candidates = set()
        for r in regions:
            # excluding same rectangle (with different segments)
            if r['rect'] in candidates:
                continue
            # excluding small regions
            if r['size'] < 220:
                continue
            if (r['rect'][2] * r['rect'][3]) < 500:
                continue
            # 按照rect尺寸裁剪原图
            proposal_img, proposal_rect = clip_pic(img, r['rect'])
            # Delete Empty array
            if len(proposal_img) == 0:
                continue
            # Ignore things contain 0 or not C contiguous array
            x, y, w, h = r['rect']
            if w == 0 or h == 0:
                continue
            # Check if any 0-dimension exist
            [a, b, c] = np.shape(proposal_img)
            if a == 0 or b == 0 or c == 0:
                continue
            # resize RPs to the input size of CNN
            resized_proposal_img = resize_image(proposal_img,
                                                config.IMAGE_SIZE,
                                                config.IMAGE_SIZE)
            candidates.add(r['rect'])
            img_float = np.asarray(resized_proposal_img, dtype="float32")
            images.append(img_float)
            # IOU
            ref_rect = tmp[2].split(',')
            ref_rect_int = [int(i) for i in ref_rect]
            iou_val = IOU(ref_rect_int, proposal_rect)
            # attach labels according to IoU threshold, 0: background
            index = int(tmp[1])
            if is_svm:
                if iou_val < threshold:
                    labels.append(0)  # negative example
                else:
                    labels.append(index)  # positive example
            else:  # fine tune
                label = np.zeros(num_clss + 1)  # one hot
                if iou_val < threshold:
                    label[0] = 1  # negative
                else:
                    label[index] = 1  # positive
                labels.append(label)
        tools.view_bar(
            "processing image of %s" % list_path.split('\\')[-1].strip(),
            num + 1, len(train_list))
        if save:
            np.save((os.path.join(
                save_path, tmp[0].split('/')[-1].split('.')[0].strip()) +
                     '_data.npy'), [images, labels])
    print(' ')
    fr.close()
def main(args):

    image_size = 24
    save_dir = str(image_size)
    anno_file = 'wider_face_train.txt'
    im_dir = 'WIDER_train/images/'

    neg_save_dir = save_dir+'/negative'
    pos_save_dir = save_dir+'/positive'
    part_save_dir = save_dir+'/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir+'/pos_24.txt', 'w')
    f2 = open(save_dir+'/neg_24.txt', 'w')
    f3 = open(save_dir+'/part_24.txt', 'w')
    threshold = 0.6
    with open(anno_file, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        model_file = args.pnet_model
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.5
            with tf.Session(config=config) as sess:
                image = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image}, mode='test')
                out_tensor = pnet.get_all_output()
                init_op = tf.global_variables_initializer()
                sess.run(init_op)
                saver = tf.train.Saver()
                saver.restore(sess, model_file)

                def pnet_fun(img): return sess.run(
                    out_tensor, feed_dict={image: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    img_path = im_dir + annotation[0] + '.jpg'
                    img = cv2.imread(img_path)
                    rectangles = detect_face_12net(img, minsize, pnet_fun,
                                                   threshold, factor)
                    image_idx += 1

                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        Iou = IoU(box, gts)
                        cropped_im = img[y_top: y_bottom+1, x_left: x_right+1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(Iou) < 0.3:
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir,
                                                     '%s.jpg' % n_idx)
                            f2.write('%s/negative/%s' %
                                     (save_dir, n_idx) + ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(Iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(Iou) >= 0.65:
                                save_file = os.path.join(pos_save_dir,
                                                         '%s.jpg' % p_idx)
                                f1.write('%s/positive/%s' % (save_dir, p_idx) +
                                         ' 1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(Iou) >= 0.4:
                                save_file = os.path.join(part_save_dir,
                                                         '%s.jpg' % d_idx)
                                f3.write('%s/part/%s' % (save_dir, d_idx) +
                                         ' -1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1,
                                          offset_x2, offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
def main(annotation_fp, image_dir, model_fp, output_dir):
    image_size = 24
    save_dir = os.path.join(output_dir, str(image_size))

    neg_save_dir = save_dir + '/negative'
    pos_save_dir = save_dir + '/positive'
    part_save_dir = save_dir + '/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir + '/pos_24.txt', 'w')
    f2 = open(save_dir + '/neg_24.txt', 'w')
    f3 = open(save_dir + '/part_24.txt', 'w')
    threshold = 0.6
    with open(annotation_fp, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        with tf.Graph().as_default():
            config = tf.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.5
            with tf.Session(config=config) as sess:
                image = tf.placeholder(tf.float32, [None, None, None, 3])
                pnet = PNet({'data': image}, mode='test')
                out_tensor = pnet.get_all_output()
                init_op = tf.global_variables_initializer()
                sess.run(init_op)
                saver = tf.train.Saver()
                saver.restore(sess, model_fp)

                def pnet_fun(img):
                    return sess.run(out_tensor, feed_dict={image: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:5]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    img_path = os.path.join(image_dir, annotation[0])
                    img = cv2.imread(img_path)
                    rectangles = detect_face_12net(img, minsize, pnet_fun,
                                                   threshold, factor)
                    image_idx += 1

                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        iou = IoU(box, gts)
                        cropped_im = img[y_top:y_bottom + 1,
                                         x_left:x_right + 1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(iou) < 0.3:
                            filename = str(n_idx) + '.jpg'
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir, filename)
                            f2.write(
                                os.path.join(neg_save_dir, filename) + ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(iou) >= 0.65:
                                filename = str(p_idx) + '.jpg'
                                save_file = os.path.join(
                                    pos_save_dir, filename)
                                f1.write(
                                    os.path.join(pos_save_dir, filename) +
                                    ' 1 %.2f %.2f %.2f %.2f\n' %
                                    (offset_x1, offset_y1, offset_x2,
                                     offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(iou) >= 0.4:
                                filename = str(d_idx) + '.jpg'
                                save_file = os.path.join(
                                    part_save_dir, filename)
                                f3.write(
                                    os.path.join(part_save_dir, filename) +
                                    ' -1 %.2f %.2f %.2f %.2f\n' %
                                    (offset_x1, offset_y1, offset_x2,
                                     offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
def generateDataForRegression(dataInfoFilePath, savePath, threshold=0.6):
    print("function = generateDataForRegression, filePath : ",
          dataInfoFilePath)
    f = open(dataInfoFilePath, "r")
    fileInfoList = f.readlines()
    # random.shuffle(fileInfoList)
    print("line num of dataInfoFile : ", len(fileInfoList))
    lineIndex = 0
    for line in fileInfoList:
        print(str(lineIndex) + " ---> Current line :", line)
        # read image file pathInfo in this line
        infoArr = line.split(" ")
        imageFilePath = infoArr[0]
        objectClass = int(infoArr[1])
        objectBoxPositonInfo = infoArr[2]
        print("image path :", imageFilePath)
        print("object class :", objectClass)
        print("object position info str :", objectBoxPositonInfo)
        positionArrInString = objectBoxPositonInfo.split(",")
        notationRectArr = [int(s) for s in positionArrInString]
        # for s in positionArrInString:
        #     notationRectArr.append(int(s))
        print("object position arr :", notationRectArr)
        print("read imageFile info success...")

        # read img from filePath
        img = cv2.imread(imageFilePath)
        print("original img size: ", np.shape(img))
        # print(img)
        # scale : size of the smallest region proposals
        # sigma : Width of Gaussian kernel for felzenszwalb segmentation
        # min_size : min size of regions
        img_lbl, regions = selectivesearch.selective_search(img,
                                                            scale=500,
                                                            sigma=0.9,
                                                            min_size=10)
        # img_lbl ??????
        img_lbl_0 = img_lbl[0]
        regions_0 = regions[0]
        print("child img size :", np.shape(img_lbl_0))
        print(regions_0
              )  # {'rect': (0, 0, 499, 441), 'size': 140000, 'labels': [0.0]}

        # choose proposal regions
        proposalRegionCandiatesInRect = set()
        choosedChildImgArr = []
        choosedCorrectionCoefArr = []

        for childImageInfo in regions:
            # childImageInfo : {'rect': (0, 0, 499, 441), 'size': 140000, 'labels': [0.0]}
            if (proposalRegionCandiatesInRect.__contains__(
                    childImageInfo['rect'])):
                print(" ------ childImage exist in candidates set, continue.")
                continue
            # delete child images which is too small
            childImageSize = childImageInfo['size']
            childImageRect = childImageInfo[
                'rect']  # 'rect' : xStart, yStart, width, length
            if (childImageInfo['size'] < 220
                    or childImageRect[2] * childImageRect[3] < 500):
                continue
            # crop original image by childImageRect
            childImg, childImgDetailRect = cropImage(img, childImageRect)
            childImgShape = np.shape(childImg)
            # check childImage
            if (len(childImg) == 0 or childImgShape[0] == 0
                    or childImgShape[1] == 0 or childImgShape[2] == 0):
                continue
            # resize image
            resizedChildImg = resize_image(childImg, 224, 224)
            proposalRegionCandiatesInRect.add(childImageRect)
            resizedChildImgInFloat = np.asarray(resizedChildImg,
                                                dtype="float32")

            # calc IOU
            # use rect in notation to compare with generated rect by selective search
            iouValue = calcIOU(notationRectArr, childImgDetailRect)
            print("IOU :", iouValue)

            if (iouValue > threshold):
                # this childImg can be used as training data for bbox regression
                # calc correctionCoef
                currCoef = calcCorrectionCoef(childImageRect, notationRectArr)
                print("correction coef :", currCoef)
                # add this childImg to result
                choosedChildImgArr.append(resizedChildImgInFloat)
                choosedCorrectionCoefArr.append(currCoef)

        tools.view_bar(
            "processing image of %s" %
            dataInfoFilePath.split('\\')[-1].strip(), lineIndex + 1,
            len(fileInfoList))
        lineIndex += 1
        # save childImg collection to npy file
        originalImgFileName = imageFilePath.split('/')[-1]
        headOfOriginalImgFileName = originalImgFileName.split('.')[0].strip()
        tailOfGeneratedFileName = '_data.npy'
        generatedFilePath = os.path.join(
            savePath, headOfOriginalImgFileName) + tailOfGeneratedFileName
        np.save(generatedFilePath,
                [choosedChildImgArr, choosedCorrectionCoefArr])
        print("store bbox regression training data into file :",
              generatedFilePath)
    f.close()
def main(args):

    image_size = 48
    save_dir = 'hard_' + str(image_size)
    anno_file = 'AWE_train.txt'
    im_dir = 'AWE_train/'
    neg_save_dir = save_dir + '/negative'
    pos_save_dir = save_dir + '/positive'
    part_save_dir = save_dir + '/part'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    if not os.path.exists(pos_save_dir):
        os.mkdir(pos_save_dir)
    if not os.path.exists(part_save_dir):
        os.mkdir(part_save_dir)
    if not os.path.exists(neg_save_dir):
        os.mkdir(neg_save_dir)

    f1 = open(save_dir + '/pos_48.txt', 'w')
    f2 = open(save_dir + '/neg_48.txt', 'w')
    f3 = open(save_dir + '/part_48.txt', 'w')
    threshold = [0.6, 0.6]
    with open(anno_file, 'r') as f:
        annotations = f.readlines()
    num = len(annotations)
    print('%d pics in total' % num)

    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # dont care
    image_idx = 0
    with tf.device('/gpu:0'):
        minsize = 20
        factor = 0.709
        model_file_pnet = args.pnet_model
        model_file_rnet = args.rnet_model
        with tf.Graph().as_default():
            config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.8
            config.gpu_options.allow_growth = True
            with tf.compat.v1.Session(config=config) as sess:
                image_pnet = tf.compat.v1.placeholder(tf.float32,
                                                      [None, None, None, 3])
                pnet = PNet({'data': image_pnet}, mode='test')
                out_tensor_pnet = pnet.get_all_output()
                image_rnet = tf.compat.v1.placeholder(tf.float32,
                                                      [None, 24, 24, 3])
                rnet = RNet({'data': image_rnet}, mode='test')
                out_tensor_rnet = rnet.get_all_output()

                saver_pnet = tf.compat.v1.train.Saver([
                    v for v in tf.compat.v1.global_variables()
                    if v.name[0:4] == 'pnet'
                ])
                saver_rnet = tf.compat.v1.train.Saver([
                    v for v in tf.compat.v1.global_variables()
                    if v.name[0:4] == 'rnet'
                ])
                saver_pnet.restore(sess, model_file_pnet)
                saver_rnet.restore(sess, model_file_rnet)

                def pnet_fun(img):
                    return sess.run(out_tensor_pnet,
                                    feed_dict={image_pnet: img})

                def rnet_fun(img):
                    return sess.run(out_tensor_rnet,
                                    feed_dict={image_rnet: img})

                for annotation in annotations:
                    annotation = annotation.strip().split(' ')
                    bbox = list(map(float, annotation[1:]))
                    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
                    img_path = im_dir + annotation[0]
                    img = cv2.imread(img_path)
                    rectangles = detect_face_24net(img, minsize, pnet_fun,
                                                   rnet_fun, threshold, factor)
                    image_idx += 1
                    view_bar(image_idx, num)
                    for box in rectangles:
                        lis = box.astype(np.int32)
                        mask = lis < 0
                        lis[mask] = 0
                        x_left, y_top, x_right, y_bottom, _ = lis
                        crop_w = x_right - x_left + 1
                        crop_h = y_bottom - y_top + 1
                        # ignore box that is too small or beyond image border
                        if crop_w < image_size or crop_h < image_size:
                            continue

                        Iou = IoU(box, gts)
                        cropped_im = img[y_top:y_bottom + 1,
                                         x_left:x_right + 1]
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=cv2.INTER_LINEAR)

                        # save negative images and write label
                        if np.max(Iou) < 0.3:
                            # Iou with all gts must below 0.3
                            save_file = os.path.join(neg_save_dir,
                                                     '%s.jpg' % n_idx)
                            f2.write('hard_%s/negative/%s' %
                                     (image_size, n_idx) + ' 0\n')
                            cv2.imwrite(save_file, resized_im)
                            n_idx += 1
                        else:
                            # find gt_box with the highest iou
                            idx = np.argmax(Iou)
                            assigned_gt = gts[idx]
                            x1, y1, x2, y2 = assigned_gt

                            # compute bbox reg label
                            offset_x1 = (x1 - x_left) / float(crop_w)
                            offset_y1 = (y1 - y_top) / float(crop_h)
                            offset_x2 = (x2 - x_right) / float(crop_w)
                            offset_y2 = (y2 - y_bottom) / float(crop_h)

                            if np.max(Iou) >= 0.65:
                                save_file = os.path.join(
                                    pos_save_dir, '%s.jpg' % p_idx)
                                f1.write('hard_%s/positive/%s' %
                                         (image_size, p_idx) +
                                         ' 1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1, offset_x2,
                                          offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                p_idx += 1

                            elif np.max(Iou) >= 0.4:
                                save_file = os.path.join(
                                    part_save_dir, '%s.jpg' % d_idx)
                                f3.write('hard_%s/part/%s' %
                                         (image_size, d_idx) +
                                         ' -1 %.2f %.2f %.2f %.2f\n' %
                                         (offset_x1, offset_y1, offset_x2,
                                          offset_y2))
                                cv2.imwrite(save_file, resized_im)
                                d_idx += 1

    f1.close()
    f2.close()
    f3.close()
Esempio n. 7
0
def main():

    size = 48
    net = str(size)
    with open('%s/pos_%s.txt' % (net, size), 'r') as f:
        pos_hard = f.readlines()
    with open('%s/neg_%s.txt' % (net, size), 'r') as f:
        neg_hard = f.readlines()
    with open('%s/part_%s.txt' % (net, size), 'r') as f:
        part_hard = f.readlines()
    with open('native_' + '%s/pos_%s.txt' % (net, size), 'r') as f:
        pos = f.readlines()
    with open('native_' + '%s/neg_%s.txt' % (net, size), 'r') as f:
        neg = f.readlines()
    with open('native_' + '%s/part_%s.txt' % (net, size), 'r') as f:
        part = f.readlines()

    print('\n' + 'positive hard')
    cur_ = 0
    sum_ = len(pos_hard)
    filename_cls = 'onet_data_for_cls.tfrecords'
    print('Writing')
    examples = []
    writer = tf.python_io.TFRecordWriter(filename_cls)
    for line in pos_hard:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([0, 1], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    print('\n' + 'positive random cropped')
    cur_ = 0
    pos_keep = npr.choice(len(pos), size=20000, replace=False)
    sum_ = len(pos_keep)
    print('Writing')
    for i in pos_keep:
        view_bar(cur_, sum_)
        cur_ += 1
        line = pos[i]
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([0, 1], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    print('\n' + 'negative random cropped')
    cur_ = 0
    neg_keep = npr.choice(len(neg), size=300000, replace=False)
    sum_ = len(neg_keep)
    for i in neg_keep:
        view_bar(cur_, sum_)
        cur_ += 1
        line = neg[i]
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([1, 0], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    print('\n' + 'negative hard')
    cur_ = 0
    sum_ = len(neg_hard)
    for line in neg_hard:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([1, 0], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    random.shuffle(examples)
    for example in examples:
        writer.write(example.SerializeToString())
    writer.close()

    print('\n' + 'positive random cropped')
    cur_ = 0
    filename_roi = 'onet_data_for_bbx.tfrecords'
    print('Writing')
    sum_ = len(pos_keep)
    examples = []
    writer = tf.python_io.TFRecordWriter(filename_roi)
    for i in pos_keep:
        view_bar(cur_, sum_)
        cur_ += 1
        line = pos[i]
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([
            float(words[2]),
            float(words[3]),
            float(words[4]),
            float(words[5])
        ],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    print('\n' + 'positive hard')
    cur_ = 0
    print('Writing')
    sum_ = len(pos_hard)
    for line in pos_hard:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([
            float(words[2]),
            float(words[3]),
            float(words[4]),
            float(words[5])
        ],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    print('\n' + 'part hard')
    cur_ = 0
    sum_ = len(part_hard)
    for line in part_hard:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([
            float(words[2]),
            float(words[3]),
            float(words[4]),
            float(words[5])
        ],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    print('\n' + 'part random cropped')
    cur_ = 0
    part_keep = npr.choice(len(part), size=100000, replace=False)
    sum_ = len(part_keep)
    for i in part_keep:
        view_bar(cur_, sum_)
        line = part[i]
        cur_ += 1
        words = line.split()
        image_file_name = words[0] + '.jpg'
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 48 or w != 48:
            im = cv2.resize(im, (48, 48))
        im = im.astype('uint8')
        label = np.array([
            float(words[2]),
            float(words[3]),
            float(words[4]),
            float(words[5])
        ],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))

    random.shuffle(examples)
    for example in examples:
        writer.write(example.SerializeToString())
    writer.close()
def load_train_proposals(datafile, num_clss, save_path, threshold=0.5, is_svm=False, save=False):
    fr = open(datafile, 'r')
    train_list = fr.readlines()
    # random.shuffle(train_list)
    for num, line in enumerate(train_list):
        labels = []
        images = []
        rects = []
        tmp = line.strip().split(' ')
        # tmp0 = image address
        # tmp1 = label
        # tmp2 = rectangle vertices
        img_path = tmp[0]
        img = cv2.imread(tmp[0])
        # 选择搜索得到候选框
        img_lbl, regions = selective_search(img_path, neighbor=8, scale=500, sigma=0.9, min_size=20)
        candidates = set()
        ref_rect = tmp[2].split(',')
        ref_rect_int = [int(i) for i in ref_rect]
        Gx = ref_rect_int[0]
        Gy = ref_rect_int[1]
        Gw = ref_rect_int[2]
        Gh = ref_rect_int[3]
        for r in regions:
            # excluding same rectangle (with different segments)
            if r['rect'] in candidates:
                continue
            # excluding small regions
            if r['size'] < 220:
                continue
            if (r['rect'][2] * r['rect'][3]) < 500:
                continue
            # 截取目标区域
            proposal_img, proposal_vertice = clip_pic(img, r['rect'])
            # Delete Empty array
            if len(proposal_img) == 0:
                continue
            # Ignore things contain 0 or not C contiguous array
            x, y, w, h = r['rect']
            if w == 0 or h == 0:
                continue
            # Check if any 0-dimension exist
            [a, b, c] = np.shape(proposal_img)
            if a == 0 or b == 0 or c == 0:
                continue
            # resize到227*227
            resized_proposal_img = resize_image(proposal_img, config.IMAGE_SIZE, config.IMAGE_SIZE)
            candidates.add(r['rect'])
            img_float = np.asarray(resized_proposal_img, dtype="float32")
            images.append(img_float)
            # IOU
            iou_val = IOU(ref_rect_int, proposal_vertice)
            # x,y,w,h作差,用于boundingbox回归
            rects.append([(Gx-x)/w, (Gy-y)/h, math.log(Gw/w), math.log(Gh/h)])
            # propasal_rect = [proposal_vertice[0], proposal_vertice[1], proposal_vertice[4], proposal_vertice[5]]
            # print(iou_val)
            # labels, let 0 represent default class, which is background
            index = int(tmp[1])
            if is_svm:
                # iou小于阈值,为背景,0
                if iou_val < threshold:
                    labels.append(0)
                elif  iou_val > 0.6: # 0.85
                    labels.append(index)
                else:
                    labels.append(-1)
            else:
                label = np.zeros(num_clss + 1)
                if iou_val < threshold:
                    label[0] = 1
                else:
                    label[index] = 1
                labels.append(label)
        if is_svm:
            ref_img, ref_vertice = clip_pic(img, ref_rect_int)
            resized_ref_img = resize_image(ref_img, config.IMAGE_SIZE, config.IMAGE_SIZE)
            img_float = np.asarray(resized_ref_img, dtype="float32")
            images.append(img_float)
            rects.append([0, 0, 0, 0])
            labels.append(index)
        tools.view_bar("processing image of %s" % datafile.split('\\')[-1].strip(), num + 1, len(train_list))

        if save:
            if is_svm:
                # strip()去除首位空格
                np.save((os.path.join(save_path, tmp[0].split('/')[-1].split('.')[0].strip()) + '_data.npy'), [images, labels, rects])
            else:
                # strip()去除首位空格
                np.save((os.path.join(save_path, tmp[0].split('/')[-1].split('.')[0].strip()) + '_data.npy'),
                        [images, labels])
    print(' ')
    fr.close()
def train_svms(train_file_folder, model):
    # 这里,我们将不同的训练集合分配到不同的txt文件里,每一个文件只含有一个种类
    files = os.listdir(train_file_folder)
    svms = []
    train_features = []
    bbox_train_features = []
    rects = []
    for train_file in files:
        if train_file.split('.')[-1] == 'txt':
            pred_last = -1
            pred_now = 0
            X, Y, R = generate_single_svm_train(os.path.join(train_file_folder, train_file))
            Y1 = []
            features1 = []
            Y_hard = []
            features_hard = []
            for ind, i in enumerate(X):
                # extract features 提取特征
                feats = model.predict([i])
                train_features.append(feats[0])
                # 所有正负样本加入feature1,Y1
                if Y[ind]>=0:
                    Y1.append(Y[ind])
                    features1.append(feats[0])
                    # 对与groundtruth的iou>0.6的加入boundingbox训练集
                    if Y[ind]>0:
                        bbox_train_features.append(feats[0])
                        rects.append(R[ind])
                # 剩下作为测试集
                else:
                    Y_hard.append(Y[ind])
                    features_hard.append(feats[0])
                tools.view_bar("extract features of %s" % train_file, ind + 1, len(X))

            # 难负例挖掘
            clf = SVC(probability=True)
            # 训练直到准确率不再提高
            while pred_now > pred_last:
                clf.fit(features1, Y1)
                features_new_hard = []
                Y_new_hard = []
                index_new_hard = []
                # 统计测试正确数量
                count = 0
                for ind, i in enumerate(features_hard):
                    # print(clf.predict([i.tolist()])[0])
                    if clf.predict([i.tolist()])[0] == 0:
                        count += 1
                    # 如果被误判为正样本,加入难负例集合
                    elif clf.predict([i.tolist()])[0] > 0:
                        # 找到被误判的难负例
                        features_new_hard.append(i)
                        Y_new_hard.append(clf.predict_proba([i.tolist()])[0][1])
                        index_new_hard.append(ind)
                # 如果难负例样本过少,停止迭代
                if len(features_new_hard)/10<1:
                    break
                pred_last = pred_now
                # 计算新的测试正确率
                pred_now = count / len(features_hard)
                # print(pred_now)
                # 难负例样本根据分类概率排序,取前10%作为负样本加入训练集
                sorted_index = np.argsort(-np.array(Y_new_hard)).tolist()[0:int(len(features_new_hard)/10)]
                for idx in sorted_index:
                    index = index_new_hard[idx]
                    features1.append(features_new_hard[idx])
                    Y1.append(0)
                    # 测试集中删除这些作为负样本加入训练集的样本。
                    features_hard.pop(index)
                    Y_hard.pop(index)

            print(' ')
            print("feature dimension")
            print(np.shape(features1))
            svms.append(clf)
            # 将clf序列化,保存svm分类器
            joblib.dump(clf, os.path.join(train_file_folder, str(train_file.split('.')[0]) + '_svm.pkl'))

    # 保存boundingbox回归训练集
    np.save((os.path.join(train_file_folder, 'bbox_train.npy')),
            [bbox_train_features, rects])
    # print(rects[0])

    return svms
def main(input_size, classifier_tfrecord_fp, localizer_tfrecord_fp,
         root_data_dir):
    net = os.path.join(root_data_dir, 'native_' + str(input_size))

    with open('%s/pos_%s.txt' % (net, input_size), 'r') as f:
        pos = f.readlines()
    with open('%s/neg_%s.txt' % (net, input_size), 'r') as f:
        neg = f.readlines()
    with open('%s/part_%s.txt' % (net, input_size), 'r') as f:
        part = f.readlines()

    print('\n' + 'pos')
    print('Writing')
    examples = []
    writer = tf.python_io.TFRecordWriter(classifier_tfrecord_fp)
    cur_ = 0
    sum_ = len(pos)
    for line in pos:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0]
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([0, 1], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)

    print('\n' + 'neg')
    cur_ = 0
    neg_keep = npr.choice(len(neg), size=min(len(neg), 1000000), replace=False)
    sum_ = len(neg_keep)
    for i in neg_keep:
        line = neg[i]
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0]
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([1, 0], dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))
    random.shuffle(examples)
    for example in examples:
        writer.write(example.SerializeToString())
    writer.close()

    print('\n' + 'pos')
    cur_ = 0
    print('Writing')
    sum_ = len(pos)
    examples = []
    writer = tf.python_io.TFRecordWriter(localizer_tfrecord_fp)
    for line in pos:
        view_bar(cur_, sum_)
        cur_ += 1
        words = line.split()
        image_file_name = words[0]
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([
            float(words[2]),
            float(words[3]),
            float(words[4]),
            float(words[5])
        ],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)

    print('\n' + 'part')
    cur_ = 0
    part_keep = npr.choice(len(part),
                           size=min(len(part), 300000),
                           replace=False)
    sum_ = len(part_keep)
    for i in part_keep:
        view_bar(cur_, sum_)
        line = part[i]
        cur_ += 1
        words = line.split()
        image_file_name = words[0]
        im = cv2.imread(image_file_name)
        h, w, ch = im.shape
        if h != 12 or w != 12:
            im = cv2.resize(im, (12, 12))
        im = im.astype('uint8')
        label = np.array([
            float(words[2]),
            float(words[3]),
            float(words[4]),
            float(words[5])
        ],
                         dtype='float32')
        label_raw = label.tostring()
        image_raw = im.tostring()
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'label_raw': bytes_feature(label_raw),
                'image_raw': bytes_feature(image_raw)
            }))
        examples.append(example)
    print(len(examples))
    random.shuffle(examples)
    for example in examples:
        writer.write(example.SerializeToString())
    writer.close()