def convert_training_model_without_landmark(PNet_train_with_lm_path):

    PNet_train_without_lm = pnet(training=True, train_with_landmark=False)

    PNet_train_with_lm = pnet(training=True, train_with_landmark=True)
    PNet_train_with_lm.load_weights(PNet_train_with_lm_path)

    temp_weights_list = []
    for layer in PNet_train_with_lm.layers:

        temp_layer = PNet_train_with_lm.get_layer(layer.name)
        temp_weights = temp_layer.get_weights()
        temp_weights_list.append(temp_weights)

    for i in range(len(PNet_train_without_lm.layers) - 3):

        PNet_train_without_lm.get_layer(
            PNet_train_without_lm.layers[i].name).set_weights(
                temp_weights_list[i])

    return PNet_train_without_lm
def convert_model_for_inference(train_with_lm, train_model_path,
                                inference_model_path):

    PNet_train = pnet(training=True, train_with_landmark=train_with_lm)
    PNet = pnet(training=False)

    PNet_train.load_weights(train_model_path)
    # PNet_train.summary()

    temp_weights_list = []
    for layer in PNet_train.layers:

        temp_layer = PNet_train.get_layer(layer.name)
        temp_weights = temp_layer.get_weights()
        temp_weights_list.append(temp_weights)

    for i in range(len(PNet.layers)):

        PNet.get_layer(PNet.layers[i].name).set_weights(temp_weights_list[i])

    PNet.save(inference_model_path)

    return PNet_train, PNet
Beispiel #3
0
def main(args):

    IMG_SIZE = args.IMG_SIZE
    if args.USE_PRETRAINED_MODEL == 0:
        USE_PRETRAINED_MODEL = False
    else:
        USE_PRETRAINED_MODEL = True
    BATCH_SIZE = 1792
    EPOCHS = 100
    DATA_COMPOSE_RATIO = [1 / 7., 3 / 7., 1 / 7.,
                          2 / 7.]  # for pos, neg, part-face & landmark data
    SAMPLE_KEEP_RATIO = 0.7
    if args.OPTIMIZER == 'sgd':
        OPTIMIZER = SGD
    else:
        OPTIMIZER = Adam
    if IMG_SIZE == 12:
        print('Training PNet')
        loss_weights = [1., 0.5, 0.5]
        model = pnet(training=True,
                     train_with_landmark=not args.TRAIN_WITHOUT_LANDMARK)
        if USE_PRETRAINED_MODEL:
            model.load_weights('../Models/PNet_train.h5')
    elif IMG_SIZE == 24:
        print('Training RNet')
        loss_weights = [1., 0.5, 0.5]
        model = rnet(training=True,
                     train_with_landmark=not args.TRAIN_WITHOUT_LANDMARK)
        if USE_PRETRAINED_MODEL:
            model.load_weights('../Models/RNet_train.h5')
    elif IMG_SIZE == 48:
        print('Training ONet')
        loss_weights = [1., 0.5, 1.]
        model = onet(training=True,
                     train_with_landmark=not args.TRAIN_WITHOUT_LANDMARK)
        if USE_PRETRAINED_MODEL:
            model.load_weights('../Models/ONet_train.h5')
    else:
        raise Exception("IMG_SIZE must be one of 12, 24 and 48. ")

    TFRECORDS_DIR = os.path.join(r'../Data', str(IMG_SIZE))
    POS_TFRECORDS_PATH_LIST = []
    NEG_TFRECORDS_PATH_LIST = []
    PART_TFRECORDS_PATH_LIST = []
    LANDMARK_TFRECORDS_PATH_LIST = []
    for file_name in os.listdir(TFRECORDS_DIR):
        if len(file_name) > 9 and file_name[-9:] == '.tfrecord':
            if 'pos' in file_name:
                POS_TFRECORDS_PATH_LIST.append(
                    os.path.join(TFRECORDS_DIR, file_name))
            elif 'neg' in file_name:
                NEG_TFRECORDS_PATH_LIST.append(
                    os.path.join(TFRECORDS_DIR, file_name))
            elif 'part' in file_name:
                PART_TFRECORDS_PATH_LIST.append(
                    os.path.join(TFRECORDS_DIR, file_name))
            elif 'landmark' in file_name:
                LANDMARK_TFRECORDS_PATH_LIST.append(
                    os.path.join(TFRECORDS_DIR, file_name))

    raw_pos_dataset = tf.data.TFRecordDataset(POS_TFRECORDS_PATH_LIST)
    raw_neg_dataset = tf.data.TFRecordDataset(NEG_TFRECORDS_PATH_LIST)
    raw_part_dataset = tf.data.TFRecordDataset(PART_TFRECORDS_PATH_LIST)
    raw_landmark_dataset = tf.data.TFRecordDataset(
        LANDMARK_TFRECORDS_PATH_LIST)

    image_feature_description = {
        'height': tf.io.FixedLenFeature([], tf.int64),
        'width': tf.io.FixedLenFeature([], tf.int64),
        'depth': tf.io.FixedLenFeature([], tf.int64),
        'info': tf.io.FixedLenFeature([17], tf.float32),
        'image_raw': tf.io.FixedLenFeature([], tf.string),
    }

    def _read_tfrecord(serialized_example):

        example = tf.io.parse_single_example(serialized_example,
                                             image_feature_description)

        img = tf.image.decode_jpeg(example['image_raw'],
                                   channels=3)  # RGB rather than BGR!!!
        img = (tf.cast(img, tf.float32) - 127.5) / 128.
        img_shape = [example['height'], example['width'], example['depth']]
        img = tf.reshape(img, img_shape)

        info = example['info']

        return img, info

    parsed_pos_dataset = raw_pos_dataset.map(_read_tfrecord)
    parsed_neg_dataset = raw_neg_dataset.map(_read_tfrecord)
    parsed_part_dataset = raw_part_dataset.map(_read_tfrecord)
    parsed_landmark_dataset = raw_landmark_dataset.map(_read_tfrecord)

    parsed_image_dataset = tf.data.Dataset.zip(
        (parsed_pos_dataset.repeat().shuffle(16384).batch(
            int(BATCH_SIZE * DATA_COMPOSE_RATIO[0])),
         parsed_neg_dataset.repeat().shuffle(16384).batch(
             int(BATCH_SIZE * DATA_COMPOSE_RATIO[1])),
         parsed_part_dataset.repeat().shuffle(16384).batch(
             int(BATCH_SIZE * DATA_COMPOSE_RATIO[2])),
         parsed_landmark_dataset.repeat().shuffle(16384).batch(
             int(BATCH_SIZE * DATA_COMPOSE_RATIO[3]))))

    def concatenate(pos_info, neg_info, part_info, landmark_info):

        img_tensor = tf.zeros((0, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32)
        label_tensor = tf.zeros((0, 17), dtype=tf.float32)
        pos_img = pos_info[0]
        neg_img = neg_info[0]
        part_img = part_info[0]
        landmark_img = landmark_info[0]
        pos_info = pos_info[1]
        neg_info = neg_info[1]
        part_info = part_info[1]
        landmark_info = landmark_info[1]
        img_tensor = tf.concat(
            [img_tensor, pos_img, neg_img, part_img, landmark_img], axis=0)
        info_tensor = tf.concat(
            [label_tensor, pos_info, neg_info, part_info, landmark_info],
            axis=0)

        return img_tensor, info_tensor

    ds = parsed_image_dataset.map(concatenate)
    ds = ds.repeat()
    ds = ds.shuffle(32)
    ds = ds.prefetch(32)
    '''Building custom loss/cost function'''

    def custom_loss(y_true, y_pred, loss_weights=loss_weights):  # Verified

        zero_index = K.zeros_like(y_true[:, 0])
        ones_index = K.ones_like(y_true[:, 0])

        # Classifier
        labels = y_true[:, 0]
        class_preds = y_pred[:, 0]
        bi_crossentropy_loss = -labels * K.log(class_preds) - (
            1 - labels) * K.log(1 - class_preds)

        classify_valid_index = tf.where(K.less(y_true[:, 0], 0), zero_index,
                                        ones_index)
        classify_keep_num = K.cast(
            tf.cast(tf.reduce_sum(classify_valid_index), tf.float32) *
            SAMPLE_KEEP_RATIO,
            dtype=tf.int32)
        # For classification problem, only pick 70% of the valid samples.

        classify_loss_sum = bi_crossentropy_loss * tf.cast(
            classify_valid_index, bi_crossentropy_loss.dtype)
        classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum,
                                                    k=classify_keep_num)
        # classify_loss = K.mean(classify_loss_sum_filtered)
        if classify_loss_sum_filtered.shape == []:
            classify_loss = tf.constant(0, dtype=tf.float32)
        else:
            classify_loss = K.mean(classify_loss_sum_filtered)

        # Bounding box regressor
        rois = y_true[:, 1:5]
        roi_preds = y_pred[:, 1:5]
        roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds),
                                          axis=1)  # mse
        # roi_raw_smooth_l1_loss = K.mean(tf.where(K.abs(rois - roi_preds) < 1, 0.5 * K.square(rois - roi_preds), K.abs(rois - roi_preds) - 0.5)) # L1 Smooth Loss

        roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1), ones_index,
                                   zero_index)
        roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index), dtype=tf.int32)

        roi_valid_mean_square_error = roi_raw_mean_square_error * tf.cast(
            roi_valid_index, roi_raw_mean_square_error.dtype)
        roi_filtered_mean_square_error, _ = tf.nn.top_k(
            roi_valid_mean_square_error, k=roi_keep_num)
        # roi_loss = K.mean(roi_filtered_mean_square_error)
        if roi_filtered_mean_square_error.shape == []:
            roi_loss = tf.constant(0, dtype=tf.float32)
        else:
            roi_loss = K.mean(roi_filtered_mean_square_error)
        # roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index
        # roi_filtered_smooth_l1_loss, _ = tf.nn.top_k(roi_valid_smooth_l1_loss, k = roi_keep_num)
        # roi_loss = K.mean(roi_filtered_smooth_l1_loss)

        # Landmark regressor
        pts = y_true[:, 5:17]
        pt_preds = y_pred[:, 5:17]
        pts_raw_mean_square_error = K.sum(K.square(pts - pt_preds),
                                          axis=1)  # mse
        # pts_raw_smooth_l1_loss = K.mean(tf.where(K.abs(pts - pt_preds) < 1, 0.5 * K.square(pts - pt_preds), K.abs(pts - pt_preds) - 0.5)) # L1 Smooth Loss

        pts_valid_index = tf.where(K.equal(y_true[:, 0], -2), ones_index,
                                   zero_index)
        pts_keep_num = K.cast(tf.reduce_sum(pts_valid_index), dtype=tf.int32)

        pts_valid_mean_square_error = pts_raw_mean_square_error * tf.cast(
            pts_valid_index, tf.float32)
        pts_filtered_mean_square_error, _ = tf.nn.top_k(
            pts_valid_mean_square_error, k=pts_keep_num)
        # pts_loss = K.mean(pts_filtered_mean_square_error)
        if len(pts_filtered_mean_square_error.shape) == 0:
            pts_loss = tf.constant(0, dtype=tf.float32)
        else:
            pts_loss = K.mean(pts_filtered_mean_square_error)
        # pts_valid_smooth_l1_loss = pts_raw_smooth_l1_loss * pts_valid_index
        # pts_filtered_smooth_l1_loss, _ = tf.nn.top_k(pts_valid_smooth_l1_loss, k = pts_keep_num)
        # pts_loss = K.mean(pts_filtered_smooth_l1_loss)

        loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[
            1] + pts_loss * loss_weights[2]

        return loss

    '''Training'''
    lr = args.LEARNING_RATE
    model.compile(optimizer=OPTIMIZER(lr=lr), loss=custom_loss)
    model.fit(ds,
              steps_per_epoch=1636,
              epochs=EPOCHS,
              validation_data=ds,
              validation_steps=1636)
    model.save(r'../Models/PNet_trained_without_lm.h5')
Beispiel #4
0
def main(args):

    IMG_SIZE = args.IMG_SIZE

    if args.USE_PRETRAINED_MODEL == 0:
        USE_PRETRAINED_MODEL = False
    else:
        USE_PRETRAINED_MODEL = True

    if args.TRAIN_WITH_LANDMARK == 0:
        TRAIN_WITH_LANDMARK = False
        BATCH_SIZE = 1280
    else:
        TRAIN_WITH_LANDMARK = True
        BATCH_SIZE = 1792

    if args.TRAIN_WITH_HARD == 1:
        TRAIN_WITH_HARD = True

    THREAD_NUM = 4
    EPOCHS = 100
    copy_num = 7 if TRAIN_WITH_LANDMARK else 5
    if IMG_SIZE == 12:
        MODEL = 'PNet'
        loss_weights = [1., 0.5, 0.5]
        model = pnet(training=True, train_with_landmark=TRAIN_WITH_LANDMARK)
        if USE_PRETRAINED_MODEL and TRAIN_WITH_LANDMARK:
            model.load_weights('../Models/PNet_train_with_lm.h5')
        elif USE_PRETRAINED_MODEL and not TRAIN_WITH_LANDMARK:
            model.load_weights('../Models/PNet_train_without_lm.h5')
    elif IMG_SIZE == 24:
        MODEL = 'RNet'
        loss_weights = [1., 0.5, 0.5]
        model = rnet(training=True, train_with_landmark=TRAIN_WITH_LANDMARK)
        if USE_PRETRAINED_MODEL and TRAIN_WITH_LANDMARK:
            model.load_weights('../Models/RNet_train_with_lm.h5')
        elif USE_PRETRAINED_MODEL and not TRAIN_WITH_LANDMARK:
            model.load_weights('../Models/RNet_train_without_lm.h5')
    elif IMG_SIZE == 48:
        MODEL = 'ONet'
        loss_weights = [1., 0.5, 1.]
        model = onet(training=True, train_with_landmark=TRAIN_WITH_LANDMARK)
        if USE_PRETRAINED_MODEL and TRAIN_WITH_LANDMARK:
            model.load_weights('../Models/ONet_train_with_lm.h5')
        elif USE_PRETRAINED_MODEL and not TRAIN_WITH_LANDMARK:
            model.load_weights('../Models/ONet_train_without_lm.h5')
    else:
        raise Exception("IMG_SIZE must be one of 12, 24 and 48. ")

    AUG_IMG_PATH = r'../Data/' + str(IMG_SIZE)
    SAMPLE_KEEP_RATIO = 0.7  # Online hard sample mining, used in face/non-face classification task which is adaptive to the training process.
    '''Importing the dataset'''
    # Load the records
    POS_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/pos_record.pkl'
    PART_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/part_record.pkl'
    NEG_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/neg_record.pkl'

    f = open(POS_RECORD_PATH, 'rb')
    pos_info = pkl.load(f)
    f.close()

    f = open(PART_RECORD_PATH, 'rb')
    part_info = pkl.load(f)
    f.close()

    f = open(NEG_RECORD_PATH, 'rb')
    neg_info = pkl.load(f)
    f.close()

    if TRAIN_WITH_LANDMARK:
        LDMK_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/landmark_record.pkl'
        f = open(LDMK_RECORD_PATH, 'rb')
        ldmk_info = pkl.load(f)
        f.close()

    if TRAIN_WITH_HARD:
        NEG_HARD_RECORD_PATH = r'../Data/' + str(
            IMG_SIZE) + '/neg_hard_record.pkl'
        f = open(NEG_HARD_RECORD_PATH, 'rb')
        neg_hard_info = pkl.load(f)
        f.close()

    if not TRAIN_WITH_LANDMARK and not TRAIN_WITH_HARD:
        # neg: pos: part-face = 3: 1: 1
        lengths = np.array([len(neg_info) / 3, len(pos_info), len(part_info)])
        # Get the dataset index with the smallest amount rate
        min_index = lengths.argsort()[0]
        if min_index == 0:
            smallest_set = neg_info
            branch_batch_size = int(BATCH_SIZE / copy_num * 3)
        elif min_index == 1:
            smallest_set = pos_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 2:
            smallest_set = part_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        else:
            raise Exception("Getting lengths of datasets error")

    elif not TRAIN_WITH_LANDMARK and TRAIN_WITH_HARD:
        # neg: neg_hard: pos: part-face = 2: 1: 1: 1
        lengths = np.array([
            len(neg_info) / 2,
            len(neg_hard_info),
            len(pos_info),
            len(part_info)
        ])
        # Get the dataset index with the smallest amount rate
        min_index = lengths.argsort()[0]
        if min_index == 0:
            smallest_set = neg_info
            branch_batch_size = int(BATCH_SIZE / copy_num * 2)
        elif min_index == 1:
            smallest_set = neg_hard_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 2:
            smallest_set = pos_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 3:
            smallest_set = part_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        else:
            raise Exception("Getting lengths of datasets error")

    elif TRAIN_WITH_LANDMARK and not TRAIN_WITH_HARD:
        # neg: pos: part-face: landmark = 3: 1: 1: 2
        lengths = np.array([
            len(neg_info) / 3,
            len(pos_info),
            len(part_info),
            len(ldmk_info) / 2
        ])
        # Get the dataset index with the smallest amount rate
        min_index = lengths.argsort()[0]
        if min_index == 0:
            smallest_set = neg_info
            branch_batch_size = int(BATCH_SIZE / copy_num * 3)
        elif min_index == 1:
            smallest_set = pos_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 2:
            smallest_set = part_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 3:
            smallest_set = ldmk_info
            branch_batch_size = int(BATCH_SIZE / copy_num * 2)
        else:
            raise Exception("Getting lengths of datasets error")

    else:
        # neg: neg_hard: pos: part-face: landmark = 2: 1: 1: 1: 2
        lengths = np.array([
            len(neg_info) / 2,
            len(neg_hard_info),
            len(pos_info),
            len(part_info),
            len(ldmk_info) / 2
        ])
        # Get the dataset index with the smallest amount rate
        min_index = lengths.argsort()[0]
        if min_index == 0:
            smallest_set = neg_info
            branch_batch_size = int(BATCH_SIZE / copy_num * 2)
        elif min_index == 1:
            smallest_set = neg_hard_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 2:
            smallest_set = pos_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 3:
            smallest_set = part_info
            branch_batch_size = int(BATCH_SIZE / copy_num)
        elif min_index == 4:
            smallest_set = ldmk_info
            branch_batch_size = int(BATCH_SIZE / copy_num * 2)
        else:
            raise Exception("Getting lengths of datasets error")
    '''Building custom loss/cost function'''
    if TRAIN_WITH_LANDMARK:

        def custom_loss(y_true, y_pred, loss_weights=loss_weights):  # Verified

            zero_index = K.zeros_like(y_true[:, 0])
            ones_index = K.ones_like(y_true[:, 0])

            # Classifier
            labels = y_true[:, 0]
            class_preds = y_pred[:, 0]
            bi_crossentropy_loss = -labels * K.log(class_preds) - (
                1 - labels) * K.log(1 - class_preds)

            classify_valid_index = tf.where(K.less(y_true[:, 0], 0),
                                            zero_index, ones_index)
            classify_keep_num = K.cast(tf.reduce_sum(classify_valid_index) *
                                       SAMPLE_KEEP_RATIO,
                                       dtype=tf.int32)
            # For classification problem, only pick 70% of the valid samples.

            classify_loss_sum = bi_crossentropy_loss * classify_valid_index
            classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum,
                                                        k=classify_keep_num)
            classify_loss = K.mean(classify_loss_sum_filtered)

            # Bounding box regressor
            rois = y_true[:, 1:5]
            roi_preds = y_pred[:, 1:5]
            # roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds), axis = 1) # mse
            roi_raw_smooth_l1_loss = K.mean(
                tf.where(
                    K.abs(rois - roi_preds) < 1,
                    0.5 * K.square(rois - roi_preds),
                    K.abs(rois - roi_preds) - 0.5))  # L1 Smooth Loss

            roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1),
                                       ones_index, zero_index)
            roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index),
                                  dtype=tf.int32)

            # roi_valid_mean_square_error = roi_raw_mean_square_error * roi_valid_index
            # roi_filtered_mean_square_error, _ = tf.nn.top_k(roi_valid_mean_square_error, k = roi_keep_num)
            # roi_loss = K.mean(roi_filtered_mean_square_error)
            roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index
            roi_filtered_smooth_l1_loss, _ = tf.nn.top_k(
                roi_valid_smooth_l1_loss, k=roi_keep_num)
            roi_loss = K.mean(roi_filtered_smooth_l1_loss)

            # Landmark regressor
            pts = y_true[:, 5:17]
            pt_preds = y_pred[:, 5:17]
            # pts_raw_mean_square_error  = K.sum(K.square(pts - pt_preds), axis = 1) # mse
            pts_raw_smooth_l1_loss = K.mean(
                tf.where(
                    K.abs(pts - pt_preds) < 1, 0.5 * K.square(pts - pt_preds),
                    K.abs(pts - pt_preds) - 0.5))  # L1 Smooth Loss

            pts_valid_index = tf.where(K.equal(y_true[:, 0], -2), ones_index,
                                       zero_index)
            pts_keep_num = K.cast(tf.reduce_sum(pts_valid_index),
                                  dtype=tf.int32)

            # pts_valid_mean_square_error = pts_raw_mean_square_error * pts_valid_index
            # pts_filtered_mean_square_error, _ = tf.nn.top_k(pts_valid_mean_square_error, k = pts_keep_num)
            # pts_loss = K.mean(pts_filtered_mean_square_error)
            pts_valid_smooth_l1_loss = pts_raw_smooth_l1_loss * pts_valid_index
            pts_filtered_smooth_l1_loss, _ = tf.nn.top_k(
                pts_valid_smooth_l1_loss, k=pts_keep_num)
            pts_loss = K.mean(pts_filtered_smooth_l1_loss)

            loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[
                1] + pts_loss * loss_weights[2]

            return loss

    else:

        def custom_loss(y_true, y_pred, loss_weights=loss_weights):  # Verified

            zero_index = K.zeros_like(y_true[:, 0])
            ones_index = K.ones_like(y_true[:, 0])

            # Classifier
            labels = y_true[:, 0]
            class_preds = y_pred[:, 0]
            bi_crossentropy_loss = -labels * K.log(class_preds) - (
                1 - labels) * K.log(1 - class_preds)

            classify_valid_index = tf.where(K.less(y_true[:, 0], 0),
                                            zero_index, ones_index)
            classify_keep_num = K.cast(tf.reduce_sum(classify_valid_index) *
                                       SAMPLE_KEEP_RATIO,
                                       dtype=tf.int32)
            # For classification problem, only pick 70% of the valid samples.

            classify_loss_sum = bi_crossentropy_loss * classify_valid_index
            classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum,
                                                        k=classify_keep_num)
            classify_loss = K.mean(classify_loss_sum_filtered)

            # Bounding box regressor
            rois = y_true[:, 1:5]
            roi_preds = y_pred[:, 1:5]
            # roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds), axis = 1) # mse
            roi_raw_smooth_l1_loss = K.mean(
                tf.where(
                    K.abs(rois - roi_preds) < 1,
                    0.5 * K.square(rois - roi_preds),
                    K.abs(rois - roi_preds) - 0.5))  # L1 Smooth Loss

            roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1),
                                       ones_index, zero_index)
            roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index),
                                  dtype=tf.int32)

            # roi_valid_mean_square_error = roi_raw_mean_square_error * roi_valid_index
            # roi_filtered_mean_square_error, _ = tf.nn.top_k(roi_valid_mean_square_error, k = roi_keep_num)
            # roi_loss = K.mean(roi_filtered_mean_square_error)
            roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index
            roi_filtered_smooth_l1_loss, _ = tf.nn.top_k(
                roi_valid_smooth_l1_loss, k=roi_keep_num)
            roi_loss = K.mean(roi_filtered_smooth_l1_loss)

            loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[1]

            return loss

    '''Training'''
    # Save the model after every epoch
    # from tensorflow.keras.callbacks import ModelCheckpoint
    # check_pointer = ModelCheckpoint(filepath = 'trained_PNet.h5', verbose = 1, save_best_only = False)

    # Stream each epoch results into a .csv file
    from tensorflow.keras.callbacks import CSVLogger
    csv_logger = CSVLogger('training.csv', separator=',', append=True)
    # append = True append if file exists (useful for continuing training)
    # append = False overwrite existing file

    num_of_iters = len(smallest_set) // branch_batch_size
    if USE_PRETRAINED_MODEL == False:
        lr = 0.001
        optimizer = Adam(lr=lr)
    else:
        lr = 0.0001
        optimizer = SGD(lr=lr)
    former_loss = 100000
    loss_patience = 500
    for i in range(EPOCHS):

        print("Epoch: " + str(i))

        # Re-shuffle all the datasets each epoch
        random.shuffle(neg_info)
        random.shuffle(part_info)
        random.shuffle(pos_info)
        if TRAIN_WITH_LANDMARK:
            random.shuffle(ldmk_info)
        if TRAIN_WITH_HARD:
            random.shuffle(neg_hard_info)

        for j in range(num_of_iters):

            print("Iteration: " + str(j) + " in Epoch: " + str(i))

            pos_batch = pos_info[int(j *
                                     (BATCH_SIZE / copy_num)):int((j + 1) *
                                                                  (BATCH_SIZE /
                                                                   copy_num))]
            part_batch = part_info[int(j * (BATCH_SIZE / copy_num)):int(
                (j + 1) * (BATCH_SIZE / copy_num))]
            if TRAIN_WITH_LANDMARK:
                ldmk_batch = ldmk_info[int(j * (BATCH_SIZE / copy_num)):int(
                    (j + 1) * (BATCH_SIZE / copy_num * 2))]
            if TRAIN_WITH_HARD:
                neg_batch = neg_info[int(j *
                                         (BATCH_SIZE / copy_num *
                                          2)):int((j + 1) *
                                                  (BATCH_SIZE / copy_num * 2))]
                neg_hard_batch = neg_hard_info[int(j * (
                    BATCH_SIZE / copy_num)):int((j + 1) *
                                                (BATCH_SIZE / copy_num))]
            else:
                neg_batch = neg_info[int(j *
                                         (BATCH_SIZE / copy_num *
                                          3)):int((j + 1) *
                                                  (BATCH_SIZE / copy_num * 3))]

            batch_list = []
            batch_list.extend(neg_batch)
            batch_list.extend(pos_batch)
            batch_list.extend(part_batch)
            if TRAIN_WITH_LANDMARK:
                batch_list.extend(ldmk_batch)
            if TRAIN_WITH_HARD:
                batch_list.extend(neg_hard_batch)
            random.shuffle(batch_list)

            # Multi-thread data processing
            num_of_batch_imgs = len(batch_list)
            thread_num = max(1, THREAD_NUM)
            num_per_thread = math.ceil(float(num_of_batch_imgs) / thread_num)
            threads = []
            X_batch_list = []
            Y_batch_list = []
            for t in range(thread_num):
                start_idx = int(num_per_thread * t)
                end_idx = int(min(num_per_thread * (t + 1), num_of_batch_imgs))
                cur_batch_list = batch_list[start_idx:end_idx]
                cur_thread = CustomThread(
                    minibatch_data_processing,
                    (cur_batch_list, AUG_IMG_PATH, TRAIN_WITH_LANDMARK))
                threads.append(cur_thread)
            for t in range(thread_num):
                threads[t].start()
            for t in range(thread_num):
                cur_processed_imgs, cur_processed_gts = threads[t].get_result()
                X_batch_list.extend(cur_processed_imgs)
                Y_batch_list.extend(cur_processed_gts)
            ''' 
            # Single-thread data processing 
            X_batch_list = []
            Y_batch_list = []
            for k in range(len(batch_list)):
                img_name = batch_list[k][0]
                # print(os.path.join(AUG_IMG_PATH, img_name))
                img = cv2.imread(os.path.join(AUG_IMG_PATH, img_name)) 
                distorted = image_random_distort(img) 
                X_batch_list.append(distorted / 255.)
                label = batch_list[k][1]
                roi = batch_list[k][2]
                landmark = batch_list[k][3]
                Y_batch_list.append([label, roi[0], roi[1], roi[2], roi[3], 
                                     landmark[0], landmark[1], landmark[2], landmark[3], 
                                     landmark[4], landmark[5], landmark[6], landmark[7], 
                                     landmark[8], landmark[9], landmark[10], landmark[11]])
            '''

            X_batch = np.array(X_batch_list)
            Y_batch = np.array(Y_batch_list)

            # Fit the data into the model for training & adjust the learning rate according to training procedure
            patience_count = 0
            model.compile(optimizer=optimizer, loss=custom_loss)
            hist = model.fit(x=X_batch,
                             y=Y_batch,
                             epochs=1,
                             shuffle=True,
                             callbacks=[csv_logger
                                        ])  # Consider .train_on_batch
            loss = hist.history['loss'][0]
            if loss >= former_loss:
                patience_count = patience_count + 1
            else:
                patience_count = 0
            former_loss = loss
            if patience_count > loss_patience:
                lr = lr * 0.1
                print("Now the learning rate is changed to " + str(lr))
            if j != 0 and j % 1000 == 0:
                lr = lr * 0.5
                print("Now the learning rate is changed to " + str(lr))

            if j % 200 == 0:
                if TRAIN_WITH_LANDMARK:
                    model_name = MODEL + '_trained_with_lm'
                else:
                    model_name = MODEL + '_trained_without_lm'
                model_name = model_name + '_epoch_' + str(i) + '_iter_' + str(
                    j) + '.h5'
                print("Saving model: " + model_name)
                model.save(os.path.join(r'../Models', model_name))
Beispiel #5
0
def main(args):
    
    IMG_SIZE = args.IMG_SIZE
    print("IMG_SIZE:", IMG_SIZE)
    if IMG_SIZE != 12 and IMG_SIZE != 24 and IMG_SIZE != 48: 
        raise Exception("Image size wrong!")
    IMG_ROOT_DIR = args.IMG_ROOT_DIR 
    if args.DEBUG == 1: 
        DEBUG = True 
    else: 
    	DEBUG = False 

    thresholds = [0.6, 0.6, 0.7]
    
    PNet = pnet(training = False)
    PNet.load_weights('../Models/PNet.h5')
    PNet.summary() 
    
    RECORD_PATH = os.path.join(IMG_ROOT_DIR, 'wider_face_add_lm_10_10_info.pkl') 
    f= open(RECORD_PATH, 'rb') 
    info = pkl.load(f) 
    f.close()
    if DEBUG: 
        info = info[: 100] 
    
    neg_hard_save_dir = r'../Data/' + str(IMG_SIZE) + '/neg_hard' 
    if not os.path.exists(neg_hard_save_dir): 
        os.mkdir(neg_hard_save_dir)
    
    neg_hard_idx = 0 
    neg_hard_list = [] 
    
    for i in range(len(info)):
        
        pic_info = info[i]
        
        img_path = os.path.join(IMG_ROOT_DIR, 'JPEGImages', pic_info[0]) 
        img = cv2.imread(img_path) 
        height, width, channel = img.shape
        
        bboxes = np.array(pic_info[2]) # total bounding boxes in one picture 
        
        if (i + 1) % 1000 == 0: 
            print(str(i + 1) + " pics processed ") 
            print(str(neg_hard_idx) + " hard negative samples generated. ") 
    
        # Generate negative hard samples
        if IMG_SIZE == 12: 
            pred_rectangles_list = pnet_prediction(img, PNet, thresholds) 
        
        if args.AUGMENT_CONTROL > 0: 
            random.shuffle(pred_rectangles_list) 
            pred_rectangles_list = pred_rectangles_list[: args.AUGMENT_CONTROL] 
        
        pred_rectangles = np.array(pred_rectangles_list) 
        pred_boxes = utils.rect2square(pred_rectangles) 
        
        for j in range(len(pred_boxes)):
            
            pred_box = pred_boxes[j] 
            x1, y1, x2, y2 = np.array(pred_box[: 4]).astype(int)
            w = x2 - x1 + 1 
            h = y2 - y1 + 1 
            
            # Drop the box that exceeds the boundary or is too small 
            if w < 20 or h < 20 or x1 < 0 or y1 < 0 or x2 > width - 1 or y2 > height - 1: 
                continue
            
            crop_box = np.array([x1, y1, x2, y2])
            if bboxes.shape[0] == 0:
                iou = 0
            else:
                iou = utils.IoU(crop_box, bboxes)
            
            if np.max(iou) < 0.1: 
                
                cropped_img = img[y1: y2, x1: x2]
                resized_img = cv2.resize(cropped_img, (IMG_SIZE, IMG_SIZE), interpolation = cv2.INTER_LINEAR)
                saving_path = os.path.join(neg_hard_save_dir, str(neg_hard_idx) + '.jpg')
                success = cv2.imwrite(saving_path, resized_img)
                if not success: 
                    raise Exception("Neg picture " + str(neg_hard_idx) + " saving failed. ") 
                
                img_name = os.path.join('neg_hard', str(neg_hard_idx) + '.jpg')
                label = 0 
                roi = np.array([-1] * 4)
                landmark = np.array([-1] * 12)
                
                neg_hard_list.append([img_name, label, roi, landmark]) 
                
                neg_hard_idx = neg_hard_idx + 1
                    
    neg_hard_anno_path = r'../Data/' + str(IMG_SIZE) + '/neg_hard_record.pkl' 
    
    file = open(neg_hard_anno_path, 'wb+') 
    pkl.dump(neg_hard_list, file)
    file.close() 
    
    print("File saving done. ")