def convert_training_model_without_landmark(PNet_train_with_lm_path): PNet_train_without_lm = pnet(training=True, train_with_landmark=False) PNet_train_with_lm = pnet(training=True, train_with_landmark=True) PNet_train_with_lm.load_weights(PNet_train_with_lm_path) temp_weights_list = [] for layer in PNet_train_with_lm.layers: temp_layer = PNet_train_with_lm.get_layer(layer.name) temp_weights = temp_layer.get_weights() temp_weights_list.append(temp_weights) for i in range(len(PNet_train_without_lm.layers) - 3): PNet_train_without_lm.get_layer( PNet_train_without_lm.layers[i].name).set_weights( temp_weights_list[i]) return PNet_train_without_lm
def convert_model_for_inference(train_with_lm, train_model_path, inference_model_path): PNet_train = pnet(training=True, train_with_landmark=train_with_lm) PNet = pnet(training=False) PNet_train.load_weights(train_model_path) # PNet_train.summary() temp_weights_list = [] for layer in PNet_train.layers: temp_layer = PNet_train.get_layer(layer.name) temp_weights = temp_layer.get_weights() temp_weights_list.append(temp_weights) for i in range(len(PNet.layers)): PNet.get_layer(PNet.layers[i].name).set_weights(temp_weights_list[i]) PNet.save(inference_model_path) return PNet_train, PNet
def main(args): IMG_SIZE = args.IMG_SIZE if args.USE_PRETRAINED_MODEL == 0: USE_PRETRAINED_MODEL = False else: USE_PRETRAINED_MODEL = True BATCH_SIZE = 1792 EPOCHS = 100 DATA_COMPOSE_RATIO = [1 / 7., 3 / 7., 1 / 7., 2 / 7.] # for pos, neg, part-face & landmark data SAMPLE_KEEP_RATIO = 0.7 if args.OPTIMIZER == 'sgd': OPTIMIZER = SGD else: OPTIMIZER = Adam if IMG_SIZE == 12: print('Training PNet') loss_weights = [1., 0.5, 0.5] model = pnet(training=True, train_with_landmark=not args.TRAIN_WITHOUT_LANDMARK) if USE_PRETRAINED_MODEL: model.load_weights('../Models/PNet_train.h5') elif IMG_SIZE == 24: print('Training RNet') loss_weights = [1., 0.5, 0.5] model = rnet(training=True, train_with_landmark=not args.TRAIN_WITHOUT_LANDMARK) if USE_PRETRAINED_MODEL: model.load_weights('../Models/RNet_train.h5') elif IMG_SIZE == 48: print('Training ONet') loss_weights = [1., 0.5, 1.] model = onet(training=True, train_with_landmark=not args.TRAIN_WITHOUT_LANDMARK) if USE_PRETRAINED_MODEL: model.load_weights('../Models/ONet_train.h5') else: raise Exception("IMG_SIZE must be one of 12, 24 and 48. ") TFRECORDS_DIR = os.path.join(r'../Data', str(IMG_SIZE)) POS_TFRECORDS_PATH_LIST = [] NEG_TFRECORDS_PATH_LIST = [] PART_TFRECORDS_PATH_LIST = [] LANDMARK_TFRECORDS_PATH_LIST = [] for file_name in os.listdir(TFRECORDS_DIR): if len(file_name) > 9 and file_name[-9:] == '.tfrecord': if 'pos' in file_name: POS_TFRECORDS_PATH_LIST.append( os.path.join(TFRECORDS_DIR, file_name)) elif 'neg' in file_name: NEG_TFRECORDS_PATH_LIST.append( os.path.join(TFRECORDS_DIR, file_name)) elif 'part' in file_name: PART_TFRECORDS_PATH_LIST.append( os.path.join(TFRECORDS_DIR, file_name)) elif 'landmark' in file_name: LANDMARK_TFRECORDS_PATH_LIST.append( os.path.join(TFRECORDS_DIR, file_name)) raw_pos_dataset = tf.data.TFRecordDataset(POS_TFRECORDS_PATH_LIST) raw_neg_dataset = tf.data.TFRecordDataset(NEG_TFRECORDS_PATH_LIST) raw_part_dataset = tf.data.TFRecordDataset(PART_TFRECORDS_PATH_LIST) raw_landmark_dataset = tf.data.TFRecordDataset( LANDMARK_TFRECORDS_PATH_LIST) image_feature_description = { 'height': tf.io.FixedLenFeature([], tf.int64), 'width': tf.io.FixedLenFeature([], tf.int64), 'depth': tf.io.FixedLenFeature([], tf.int64), 'info': tf.io.FixedLenFeature([17], tf.float32), 'image_raw': tf.io.FixedLenFeature([], tf.string), } def _read_tfrecord(serialized_example): example = tf.io.parse_single_example(serialized_example, image_feature_description) img = tf.image.decode_jpeg(example['image_raw'], channels=3) # RGB rather than BGR!!! img = (tf.cast(img, tf.float32) - 127.5) / 128. img_shape = [example['height'], example['width'], example['depth']] img = tf.reshape(img, img_shape) info = example['info'] return img, info parsed_pos_dataset = raw_pos_dataset.map(_read_tfrecord) parsed_neg_dataset = raw_neg_dataset.map(_read_tfrecord) parsed_part_dataset = raw_part_dataset.map(_read_tfrecord) parsed_landmark_dataset = raw_landmark_dataset.map(_read_tfrecord) parsed_image_dataset = tf.data.Dataset.zip( (parsed_pos_dataset.repeat().shuffle(16384).batch( int(BATCH_SIZE * DATA_COMPOSE_RATIO[0])), parsed_neg_dataset.repeat().shuffle(16384).batch( int(BATCH_SIZE * DATA_COMPOSE_RATIO[1])), parsed_part_dataset.repeat().shuffle(16384).batch( int(BATCH_SIZE * DATA_COMPOSE_RATIO[2])), parsed_landmark_dataset.repeat().shuffle(16384).batch( int(BATCH_SIZE * DATA_COMPOSE_RATIO[3])))) def concatenate(pos_info, neg_info, part_info, landmark_info): img_tensor = tf.zeros((0, IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32) label_tensor = tf.zeros((0, 17), dtype=tf.float32) pos_img = pos_info[0] neg_img = neg_info[0] part_img = part_info[0] landmark_img = landmark_info[0] pos_info = pos_info[1] neg_info = neg_info[1] part_info = part_info[1] landmark_info = landmark_info[1] img_tensor = tf.concat( [img_tensor, pos_img, neg_img, part_img, landmark_img], axis=0) info_tensor = tf.concat( [label_tensor, pos_info, neg_info, part_info, landmark_info], axis=0) return img_tensor, info_tensor ds = parsed_image_dataset.map(concatenate) ds = ds.repeat() ds = ds.shuffle(32) ds = ds.prefetch(32) '''Building custom loss/cost function''' def custom_loss(y_true, y_pred, loss_weights=loss_weights): # Verified zero_index = K.zeros_like(y_true[:, 0]) ones_index = K.ones_like(y_true[:, 0]) # Classifier labels = y_true[:, 0] class_preds = y_pred[:, 0] bi_crossentropy_loss = -labels * K.log(class_preds) - ( 1 - labels) * K.log(1 - class_preds) classify_valid_index = tf.where(K.less(y_true[:, 0], 0), zero_index, ones_index) classify_keep_num = K.cast( tf.cast(tf.reduce_sum(classify_valid_index), tf.float32) * SAMPLE_KEEP_RATIO, dtype=tf.int32) # For classification problem, only pick 70% of the valid samples. classify_loss_sum = bi_crossentropy_loss * tf.cast( classify_valid_index, bi_crossentropy_loss.dtype) classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum, k=classify_keep_num) # classify_loss = K.mean(classify_loss_sum_filtered) if classify_loss_sum_filtered.shape == []: classify_loss = tf.constant(0, dtype=tf.float32) else: classify_loss = K.mean(classify_loss_sum_filtered) # Bounding box regressor rois = y_true[:, 1:5] roi_preds = y_pred[:, 1:5] roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds), axis=1) # mse # roi_raw_smooth_l1_loss = K.mean(tf.where(K.abs(rois - roi_preds) < 1, 0.5 * K.square(rois - roi_preds), K.abs(rois - roi_preds) - 0.5)) # L1 Smooth Loss roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1), ones_index, zero_index) roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index), dtype=tf.int32) roi_valid_mean_square_error = roi_raw_mean_square_error * tf.cast( roi_valid_index, roi_raw_mean_square_error.dtype) roi_filtered_mean_square_error, _ = tf.nn.top_k( roi_valid_mean_square_error, k=roi_keep_num) # roi_loss = K.mean(roi_filtered_mean_square_error) if roi_filtered_mean_square_error.shape == []: roi_loss = tf.constant(0, dtype=tf.float32) else: roi_loss = K.mean(roi_filtered_mean_square_error) # roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index # roi_filtered_smooth_l1_loss, _ = tf.nn.top_k(roi_valid_smooth_l1_loss, k = roi_keep_num) # roi_loss = K.mean(roi_filtered_smooth_l1_loss) # Landmark regressor pts = y_true[:, 5:17] pt_preds = y_pred[:, 5:17] pts_raw_mean_square_error = K.sum(K.square(pts - pt_preds), axis=1) # mse # pts_raw_smooth_l1_loss = K.mean(tf.where(K.abs(pts - pt_preds) < 1, 0.5 * K.square(pts - pt_preds), K.abs(pts - pt_preds) - 0.5)) # L1 Smooth Loss pts_valid_index = tf.where(K.equal(y_true[:, 0], -2), ones_index, zero_index) pts_keep_num = K.cast(tf.reduce_sum(pts_valid_index), dtype=tf.int32) pts_valid_mean_square_error = pts_raw_mean_square_error * tf.cast( pts_valid_index, tf.float32) pts_filtered_mean_square_error, _ = tf.nn.top_k( pts_valid_mean_square_error, k=pts_keep_num) # pts_loss = K.mean(pts_filtered_mean_square_error) if len(pts_filtered_mean_square_error.shape) == 0: pts_loss = tf.constant(0, dtype=tf.float32) else: pts_loss = K.mean(pts_filtered_mean_square_error) # pts_valid_smooth_l1_loss = pts_raw_smooth_l1_loss * pts_valid_index # pts_filtered_smooth_l1_loss, _ = tf.nn.top_k(pts_valid_smooth_l1_loss, k = pts_keep_num) # pts_loss = K.mean(pts_filtered_smooth_l1_loss) loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[ 1] + pts_loss * loss_weights[2] return loss '''Training''' lr = args.LEARNING_RATE model.compile(optimizer=OPTIMIZER(lr=lr), loss=custom_loss) model.fit(ds, steps_per_epoch=1636, epochs=EPOCHS, validation_data=ds, validation_steps=1636) model.save(r'../Models/PNet_trained_without_lm.h5')
def main(args): IMG_SIZE = args.IMG_SIZE if args.USE_PRETRAINED_MODEL == 0: USE_PRETRAINED_MODEL = False else: USE_PRETRAINED_MODEL = True if args.TRAIN_WITH_LANDMARK == 0: TRAIN_WITH_LANDMARK = False BATCH_SIZE = 1280 else: TRAIN_WITH_LANDMARK = True BATCH_SIZE = 1792 if args.TRAIN_WITH_HARD == 1: TRAIN_WITH_HARD = True THREAD_NUM = 4 EPOCHS = 100 copy_num = 7 if TRAIN_WITH_LANDMARK else 5 if IMG_SIZE == 12: MODEL = 'PNet' loss_weights = [1., 0.5, 0.5] model = pnet(training=True, train_with_landmark=TRAIN_WITH_LANDMARK) if USE_PRETRAINED_MODEL and TRAIN_WITH_LANDMARK: model.load_weights('../Models/PNet_train_with_lm.h5') elif USE_PRETRAINED_MODEL and not TRAIN_WITH_LANDMARK: model.load_weights('../Models/PNet_train_without_lm.h5') elif IMG_SIZE == 24: MODEL = 'RNet' loss_weights = [1., 0.5, 0.5] model = rnet(training=True, train_with_landmark=TRAIN_WITH_LANDMARK) if USE_PRETRAINED_MODEL and TRAIN_WITH_LANDMARK: model.load_weights('../Models/RNet_train_with_lm.h5') elif USE_PRETRAINED_MODEL and not TRAIN_WITH_LANDMARK: model.load_weights('../Models/RNet_train_without_lm.h5') elif IMG_SIZE == 48: MODEL = 'ONet' loss_weights = [1., 0.5, 1.] model = onet(training=True, train_with_landmark=TRAIN_WITH_LANDMARK) if USE_PRETRAINED_MODEL and TRAIN_WITH_LANDMARK: model.load_weights('../Models/ONet_train_with_lm.h5') elif USE_PRETRAINED_MODEL and not TRAIN_WITH_LANDMARK: model.load_weights('../Models/ONet_train_without_lm.h5') else: raise Exception("IMG_SIZE must be one of 12, 24 and 48. ") AUG_IMG_PATH = r'../Data/' + str(IMG_SIZE) SAMPLE_KEEP_RATIO = 0.7 # Online hard sample mining, used in face/non-face classification task which is adaptive to the training process. '''Importing the dataset''' # Load the records POS_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/pos_record.pkl' PART_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/part_record.pkl' NEG_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/neg_record.pkl' f = open(POS_RECORD_PATH, 'rb') pos_info = pkl.load(f) f.close() f = open(PART_RECORD_PATH, 'rb') part_info = pkl.load(f) f.close() f = open(NEG_RECORD_PATH, 'rb') neg_info = pkl.load(f) f.close() if TRAIN_WITH_LANDMARK: LDMK_RECORD_PATH = r'../Data/' + str(IMG_SIZE) + '/landmark_record.pkl' f = open(LDMK_RECORD_PATH, 'rb') ldmk_info = pkl.load(f) f.close() if TRAIN_WITH_HARD: NEG_HARD_RECORD_PATH = r'../Data/' + str( IMG_SIZE) + '/neg_hard_record.pkl' f = open(NEG_HARD_RECORD_PATH, 'rb') neg_hard_info = pkl.load(f) f.close() if not TRAIN_WITH_LANDMARK and not TRAIN_WITH_HARD: # neg: pos: part-face = 3: 1: 1 lengths = np.array([len(neg_info) / 3, len(pos_info), len(part_info)]) # Get the dataset index with the smallest amount rate min_index = lengths.argsort()[0] if min_index == 0: smallest_set = neg_info branch_batch_size = int(BATCH_SIZE / copy_num * 3) elif min_index == 1: smallest_set = pos_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 2: smallest_set = part_info branch_batch_size = int(BATCH_SIZE / copy_num) else: raise Exception("Getting lengths of datasets error") elif not TRAIN_WITH_LANDMARK and TRAIN_WITH_HARD: # neg: neg_hard: pos: part-face = 2: 1: 1: 1 lengths = np.array([ len(neg_info) / 2, len(neg_hard_info), len(pos_info), len(part_info) ]) # Get the dataset index with the smallest amount rate min_index = lengths.argsort()[0] if min_index == 0: smallest_set = neg_info branch_batch_size = int(BATCH_SIZE / copy_num * 2) elif min_index == 1: smallest_set = neg_hard_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 2: smallest_set = pos_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 3: smallest_set = part_info branch_batch_size = int(BATCH_SIZE / copy_num) else: raise Exception("Getting lengths of datasets error") elif TRAIN_WITH_LANDMARK and not TRAIN_WITH_HARD: # neg: pos: part-face: landmark = 3: 1: 1: 2 lengths = np.array([ len(neg_info) / 3, len(pos_info), len(part_info), len(ldmk_info) / 2 ]) # Get the dataset index with the smallest amount rate min_index = lengths.argsort()[0] if min_index == 0: smallest_set = neg_info branch_batch_size = int(BATCH_SIZE / copy_num * 3) elif min_index == 1: smallest_set = pos_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 2: smallest_set = part_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 3: smallest_set = ldmk_info branch_batch_size = int(BATCH_SIZE / copy_num * 2) else: raise Exception("Getting lengths of datasets error") else: # neg: neg_hard: pos: part-face: landmark = 2: 1: 1: 1: 2 lengths = np.array([ len(neg_info) / 2, len(neg_hard_info), len(pos_info), len(part_info), len(ldmk_info) / 2 ]) # Get the dataset index with the smallest amount rate min_index = lengths.argsort()[0] if min_index == 0: smallest_set = neg_info branch_batch_size = int(BATCH_SIZE / copy_num * 2) elif min_index == 1: smallest_set = neg_hard_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 2: smallest_set = pos_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 3: smallest_set = part_info branch_batch_size = int(BATCH_SIZE / copy_num) elif min_index == 4: smallest_set = ldmk_info branch_batch_size = int(BATCH_SIZE / copy_num * 2) else: raise Exception("Getting lengths of datasets error") '''Building custom loss/cost function''' if TRAIN_WITH_LANDMARK: def custom_loss(y_true, y_pred, loss_weights=loss_weights): # Verified zero_index = K.zeros_like(y_true[:, 0]) ones_index = K.ones_like(y_true[:, 0]) # Classifier labels = y_true[:, 0] class_preds = y_pred[:, 0] bi_crossentropy_loss = -labels * K.log(class_preds) - ( 1 - labels) * K.log(1 - class_preds) classify_valid_index = tf.where(K.less(y_true[:, 0], 0), zero_index, ones_index) classify_keep_num = K.cast(tf.reduce_sum(classify_valid_index) * SAMPLE_KEEP_RATIO, dtype=tf.int32) # For classification problem, only pick 70% of the valid samples. classify_loss_sum = bi_crossentropy_loss * classify_valid_index classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum, k=classify_keep_num) classify_loss = K.mean(classify_loss_sum_filtered) # Bounding box regressor rois = y_true[:, 1:5] roi_preds = y_pred[:, 1:5] # roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds), axis = 1) # mse roi_raw_smooth_l1_loss = K.mean( tf.where( K.abs(rois - roi_preds) < 1, 0.5 * K.square(rois - roi_preds), K.abs(rois - roi_preds) - 0.5)) # L1 Smooth Loss roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1), ones_index, zero_index) roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index), dtype=tf.int32) # roi_valid_mean_square_error = roi_raw_mean_square_error * roi_valid_index # roi_filtered_mean_square_error, _ = tf.nn.top_k(roi_valid_mean_square_error, k = roi_keep_num) # roi_loss = K.mean(roi_filtered_mean_square_error) roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index roi_filtered_smooth_l1_loss, _ = tf.nn.top_k( roi_valid_smooth_l1_loss, k=roi_keep_num) roi_loss = K.mean(roi_filtered_smooth_l1_loss) # Landmark regressor pts = y_true[:, 5:17] pt_preds = y_pred[:, 5:17] # pts_raw_mean_square_error = K.sum(K.square(pts - pt_preds), axis = 1) # mse pts_raw_smooth_l1_loss = K.mean( tf.where( K.abs(pts - pt_preds) < 1, 0.5 * K.square(pts - pt_preds), K.abs(pts - pt_preds) - 0.5)) # L1 Smooth Loss pts_valid_index = tf.where(K.equal(y_true[:, 0], -2), ones_index, zero_index) pts_keep_num = K.cast(tf.reduce_sum(pts_valid_index), dtype=tf.int32) # pts_valid_mean_square_error = pts_raw_mean_square_error * pts_valid_index # pts_filtered_mean_square_error, _ = tf.nn.top_k(pts_valid_mean_square_error, k = pts_keep_num) # pts_loss = K.mean(pts_filtered_mean_square_error) pts_valid_smooth_l1_loss = pts_raw_smooth_l1_loss * pts_valid_index pts_filtered_smooth_l1_loss, _ = tf.nn.top_k( pts_valid_smooth_l1_loss, k=pts_keep_num) pts_loss = K.mean(pts_filtered_smooth_l1_loss) loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[ 1] + pts_loss * loss_weights[2] return loss else: def custom_loss(y_true, y_pred, loss_weights=loss_weights): # Verified zero_index = K.zeros_like(y_true[:, 0]) ones_index = K.ones_like(y_true[:, 0]) # Classifier labels = y_true[:, 0] class_preds = y_pred[:, 0] bi_crossentropy_loss = -labels * K.log(class_preds) - ( 1 - labels) * K.log(1 - class_preds) classify_valid_index = tf.where(K.less(y_true[:, 0], 0), zero_index, ones_index) classify_keep_num = K.cast(tf.reduce_sum(classify_valid_index) * SAMPLE_KEEP_RATIO, dtype=tf.int32) # For classification problem, only pick 70% of the valid samples. classify_loss_sum = bi_crossentropy_loss * classify_valid_index classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum, k=classify_keep_num) classify_loss = K.mean(classify_loss_sum_filtered) # Bounding box regressor rois = y_true[:, 1:5] roi_preds = y_pred[:, 1:5] # roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds), axis = 1) # mse roi_raw_smooth_l1_loss = K.mean( tf.where( K.abs(rois - roi_preds) < 1, 0.5 * K.square(rois - roi_preds), K.abs(rois - roi_preds) - 0.5)) # L1 Smooth Loss roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1), ones_index, zero_index) roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index), dtype=tf.int32) # roi_valid_mean_square_error = roi_raw_mean_square_error * roi_valid_index # roi_filtered_mean_square_error, _ = tf.nn.top_k(roi_valid_mean_square_error, k = roi_keep_num) # roi_loss = K.mean(roi_filtered_mean_square_error) roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index roi_filtered_smooth_l1_loss, _ = tf.nn.top_k( roi_valid_smooth_l1_loss, k=roi_keep_num) roi_loss = K.mean(roi_filtered_smooth_l1_loss) loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[1] return loss '''Training''' # Save the model after every epoch # from tensorflow.keras.callbacks import ModelCheckpoint # check_pointer = ModelCheckpoint(filepath = 'trained_PNet.h5', verbose = 1, save_best_only = False) # Stream each epoch results into a .csv file from tensorflow.keras.callbacks import CSVLogger csv_logger = CSVLogger('training.csv', separator=',', append=True) # append = True append if file exists (useful for continuing training) # append = False overwrite existing file num_of_iters = len(smallest_set) // branch_batch_size if USE_PRETRAINED_MODEL == False: lr = 0.001 optimizer = Adam(lr=lr) else: lr = 0.0001 optimizer = SGD(lr=lr) former_loss = 100000 loss_patience = 500 for i in range(EPOCHS): print("Epoch: " + str(i)) # Re-shuffle all the datasets each epoch random.shuffle(neg_info) random.shuffle(part_info) random.shuffle(pos_info) if TRAIN_WITH_LANDMARK: random.shuffle(ldmk_info) if TRAIN_WITH_HARD: random.shuffle(neg_hard_info) for j in range(num_of_iters): print("Iteration: " + str(j) + " in Epoch: " + str(i)) pos_batch = pos_info[int(j * (BATCH_SIZE / copy_num)):int((j + 1) * (BATCH_SIZE / copy_num))] part_batch = part_info[int(j * (BATCH_SIZE / copy_num)):int( (j + 1) * (BATCH_SIZE / copy_num))] if TRAIN_WITH_LANDMARK: ldmk_batch = ldmk_info[int(j * (BATCH_SIZE / copy_num)):int( (j + 1) * (BATCH_SIZE / copy_num * 2))] if TRAIN_WITH_HARD: neg_batch = neg_info[int(j * (BATCH_SIZE / copy_num * 2)):int((j + 1) * (BATCH_SIZE / copy_num * 2))] neg_hard_batch = neg_hard_info[int(j * ( BATCH_SIZE / copy_num)):int((j + 1) * (BATCH_SIZE / copy_num))] else: neg_batch = neg_info[int(j * (BATCH_SIZE / copy_num * 3)):int((j + 1) * (BATCH_SIZE / copy_num * 3))] batch_list = [] batch_list.extend(neg_batch) batch_list.extend(pos_batch) batch_list.extend(part_batch) if TRAIN_WITH_LANDMARK: batch_list.extend(ldmk_batch) if TRAIN_WITH_HARD: batch_list.extend(neg_hard_batch) random.shuffle(batch_list) # Multi-thread data processing num_of_batch_imgs = len(batch_list) thread_num = max(1, THREAD_NUM) num_per_thread = math.ceil(float(num_of_batch_imgs) / thread_num) threads = [] X_batch_list = [] Y_batch_list = [] for t in range(thread_num): start_idx = int(num_per_thread * t) end_idx = int(min(num_per_thread * (t + 1), num_of_batch_imgs)) cur_batch_list = batch_list[start_idx:end_idx] cur_thread = CustomThread( minibatch_data_processing, (cur_batch_list, AUG_IMG_PATH, TRAIN_WITH_LANDMARK)) threads.append(cur_thread) for t in range(thread_num): threads[t].start() for t in range(thread_num): cur_processed_imgs, cur_processed_gts = threads[t].get_result() X_batch_list.extend(cur_processed_imgs) Y_batch_list.extend(cur_processed_gts) ''' # Single-thread data processing X_batch_list = [] Y_batch_list = [] for k in range(len(batch_list)): img_name = batch_list[k][0] # print(os.path.join(AUG_IMG_PATH, img_name)) img = cv2.imread(os.path.join(AUG_IMG_PATH, img_name)) distorted = image_random_distort(img) X_batch_list.append(distorted / 255.) label = batch_list[k][1] roi = batch_list[k][2] landmark = batch_list[k][3] Y_batch_list.append([label, roi[0], roi[1], roi[2], roi[3], landmark[0], landmark[1], landmark[2], landmark[3], landmark[4], landmark[5], landmark[6], landmark[7], landmark[8], landmark[9], landmark[10], landmark[11]]) ''' X_batch = np.array(X_batch_list) Y_batch = np.array(Y_batch_list) # Fit the data into the model for training & adjust the learning rate according to training procedure patience_count = 0 model.compile(optimizer=optimizer, loss=custom_loss) hist = model.fit(x=X_batch, y=Y_batch, epochs=1, shuffle=True, callbacks=[csv_logger ]) # Consider .train_on_batch loss = hist.history['loss'][0] if loss >= former_loss: patience_count = patience_count + 1 else: patience_count = 0 former_loss = loss if patience_count > loss_patience: lr = lr * 0.1 print("Now the learning rate is changed to " + str(lr)) if j != 0 and j % 1000 == 0: lr = lr * 0.5 print("Now the learning rate is changed to " + str(lr)) if j % 200 == 0: if TRAIN_WITH_LANDMARK: model_name = MODEL + '_trained_with_lm' else: model_name = MODEL + '_trained_without_lm' model_name = model_name + '_epoch_' + str(i) + '_iter_' + str( j) + '.h5' print("Saving model: " + model_name) model.save(os.path.join(r'../Models', model_name))
def main(args): IMG_SIZE = args.IMG_SIZE print("IMG_SIZE:", IMG_SIZE) if IMG_SIZE != 12 and IMG_SIZE != 24 and IMG_SIZE != 48: raise Exception("Image size wrong!") IMG_ROOT_DIR = args.IMG_ROOT_DIR if args.DEBUG == 1: DEBUG = True else: DEBUG = False thresholds = [0.6, 0.6, 0.7] PNet = pnet(training = False) PNet.load_weights('../Models/PNet.h5') PNet.summary() RECORD_PATH = os.path.join(IMG_ROOT_DIR, 'wider_face_add_lm_10_10_info.pkl') f= open(RECORD_PATH, 'rb') info = pkl.load(f) f.close() if DEBUG: info = info[: 100] neg_hard_save_dir = r'../Data/' + str(IMG_SIZE) + '/neg_hard' if not os.path.exists(neg_hard_save_dir): os.mkdir(neg_hard_save_dir) neg_hard_idx = 0 neg_hard_list = [] for i in range(len(info)): pic_info = info[i] img_path = os.path.join(IMG_ROOT_DIR, 'JPEGImages', pic_info[0]) img = cv2.imread(img_path) height, width, channel = img.shape bboxes = np.array(pic_info[2]) # total bounding boxes in one picture if (i + 1) % 1000 == 0: print(str(i + 1) + " pics processed ") print(str(neg_hard_idx) + " hard negative samples generated. ") # Generate negative hard samples if IMG_SIZE == 12: pred_rectangles_list = pnet_prediction(img, PNet, thresholds) if args.AUGMENT_CONTROL > 0: random.shuffle(pred_rectangles_list) pred_rectangles_list = pred_rectangles_list[: args.AUGMENT_CONTROL] pred_rectangles = np.array(pred_rectangles_list) pred_boxes = utils.rect2square(pred_rectangles) for j in range(len(pred_boxes)): pred_box = pred_boxes[j] x1, y1, x2, y2 = np.array(pred_box[: 4]).astype(int) w = x2 - x1 + 1 h = y2 - y1 + 1 # Drop the box that exceeds the boundary or is too small if w < 20 or h < 20 or x1 < 0 or y1 < 0 or x2 > width - 1 or y2 > height - 1: continue crop_box = np.array([x1, y1, x2, y2]) if bboxes.shape[0] == 0: iou = 0 else: iou = utils.IoU(crop_box, bboxes) if np.max(iou) < 0.1: cropped_img = img[y1: y2, x1: x2] resized_img = cv2.resize(cropped_img, (IMG_SIZE, IMG_SIZE), interpolation = cv2.INTER_LINEAR) saving_path = os.path.join(neg_hard_save_dir, str(neg_hard_idx) + '.jpg') success = cv2.imwrite(saving_path, resized_img) if not success: raise Exception("Neg picture " + str(neg_hard_idx) + " saving failed. ") img_name = os.path.join('neg_hard', str(neg_hard_idx) + '.jpg') label = 0 roi = np.array([-1] * 4) landmark = np.array([-1] * 12) neg_hard_list.append([img_name, label, roi, landmark]) neg_hard_idx = neg_hard_idx + 1 neg_hard_anno_path = r'../Data/' + str(IMG_SIZE) + '/neg_hard_record.pkl' file = open(neg_hard_anno_path, 'wb+') pkl.dump(neg_hard_list, file) file.close() print("File saving done. ")