def get_random_sample(image, shape, rotation_stddev=10): # Read a random image with landmarks and bb image = menpo.image.Image(image.transpose((2, 0, 1)), copy=False) image.landmarks['PTS'] = PointCloud(shape) if np.random.rand() < .5: image = utils.mirror_image(image) if np.random.rand() < .5: theta = np.random.normal(scale=rotation_stddev) rot = menpo.transform.rotate_ccw_about_centre( image.landmarks['PTS'], theta) image = image.warp_to_shape(image.shape, rot) bb = image.landmarks['PTS'].bounding_box().points miny, minx = np.min(bb, 0) maxy, maxx = np.max(bb, 0) bbsize = max(maxx - minx, maxy - miny) center = [(miny + maxy) / 2., (minx + maxx) / 2.] shift = (np.random.rand(2) - 0.5) * 0.6 * bbsize image.landmarks['bb'] = PointCloud([ [ center[0] - bbsize * 0.5 + shift[0], center[1] - bbsize * 0.5 + shift[1] ], [ center[0] + bbsize * 0.5 + shift[0], center[1] + bbsize * 0.5 + shift[1] ], ]).bounding_box() proportion = 1.0 / 6.0 + float(np.random.rand() - 0.5) / 6. image = image.crop_to_landmarks_proportion(proportion, group='bb') image = image.resize((112, 112)) random_image = image.pixels.transpose(1, 2, 0).astype('float32') random_shape = image.landmarks['PTS'].points.astype('float32') return random_image, random_shape
def get_random_sample(image, shape, rotation_stddev=10): # Read a random image with landmarks and bb image = menpo.image.Image(image.transpose((2, 0, 1)), copy=False) image.landmarks['PTS'] = PointCloud(shape) if np.random.rand() < .5: image = utils.mirror_image(image) if np.random.rand() < .5: theta = np.random.normal(scale=rotation_stddev) rot = menpo.transform.rotate_ccw_about_centre( image.landmarks['PTS'], theta) image = image.warp_to_shape(image.shape, rot) bb = image.landmarks['PTS'].bounding_box().points miny, minx = np.min(bb, 0) maxy, maxx = np.max(bb, 0) bbsize = max(maxx - minx, maxy - miny) center = [(miny + maxy) / 2., (minx + maxx) / 2.] shift = (np.random.rand(2) - 0.5) / 6. * bbsize image.landmarks['bb'] = PointCloud([ [ center[0] - bbsize * 0.5 + shift[0], center[1] - bbsize * 0.5 + shift[1] ], [ center[0] + bbsize * 0.5 + shift[0], center[1] + bbsize * 0.5 + shift[1] ], ]).bounding_box() proportion = 1.0 / 6.0 + float(np.random.rand() - 0.5) / 10.0 image = image.crop_to_landmarks_proportion(proportion, group='bb') image = image.resize((112, 112)) random_image = image.pixels.transpose(1, 2, 0).astype('float32') random_shape = image.landmarks['PTS'].points.astype('float32') # Occlude _O_AREA = 0.15 _O_MIN_H = 0.15 _O_MAX_H = 1.0 if np.random.rand() < .3: rh = min( 112, int((np.random.rand() * (_O_MAX_H - _O_MIN_H) + _O_MIN_H) * 112)) rw = min(112, int(12544 * _O_AREA / rh)) dy = int(np.random.rand() * (112 - rh)) dx = int(np.random.rand() * (112 - rw)) idx = int(np.random.rand() * _num_negatives) random_image[dy:dy + rh, dx:dx + rw] = np.minimum( 1.0, _negatives[idx][dy:dy + rh, dx:dx + rw]) return random_image, random_shape
def crop_to_face_image(img, bb_dictionary=None, gt=True, margin=0.25, image_size=256): name = img.path.name img_bounds = img.bounds()[1] if bb_dictionary is None: bb_menpo = img.landmarks['PTS'].bounding_box().points bb = np.array( [[bb_menpo[0, 1], bb_menpo[0, 0], bb_menpo[2, 1], bb_menpo[2, 0]]]) else: if gt: bb = bb_dictionary[name][1] # ground truth else: bb = bb_dictionary[name][0] # init from face detector bb = center_margin_bb(bb, img_bounds, margin=margin) bb_pointcloud = PointCloud( np.array([[bb[0, 1], bb[0, 0]], [bb[0, 3], bb[0, 0]], [bb[0, 3], bb[0, 2]], [bb[0, 1], bb[0, 2]]])) face_crop = img.crop_to_pointcloud(bb_pointcloud).resize( [image_size, image_size]) return face_crop
def load_image_test(path, reference_shape, frame_num): file_name = path[:-1] + "/%06d.jpg" % (frame_num) im = mio.import_image(file_name) im.landmarks['PTS'] = mio.import_landmark_file(path[:-1] + "/annot/%06d.pts" % (frame_num)) # im.landmarks['PTS'] = mio.import_landmark_file(path[:-1] + "/%06d.pts" % (frame_num)) bb_path = path[:-1] + "/bbs/%06d.pts" % (frame_num) im.landmarks['bb'] = mio.import_landmark_file(bb_path) im = im.crop_to_landmarks_proportion(0.3, group='bb') reference_shape = PointCloud(reference_shape) bb = im.landmarks['bb'].lms.bounding_box() im.landmarks['__initial'] = align_shape_with_bounding_box( reference_shape, bb) im = im.rescale_to_pointcloud(reference_shape, group='__initial') lms = im.landmarks['PTS'].lms initial = im.landmarks['__initial'].lms # if the image is greyscale then convert to rgb. pixels = grey_to_rgb(im).pixels.transpose(1, 2, 0) gt_truth = lms.points.astype(np.float32) estimate = initial.points.astype(np.float32) return 1, pixels.astype(np.float32).copy(), gt_truth, estimate
def flip_predictions(predictions, shapes): flipped_preds = [] for pred, shape in zip(predictions, shapes): pred = utils.mirror_landmarks(PointCloud(pred), shape[1]) flipped_preds.append(pred.points.astype(np.float32)) return np.array(flipped_preds, np.float32)
def initial_shape_fromMap(image): # a = np.random.random((10, 10)) rspmapShape = image.rspmap_data[0, 0, ...].shape n_points = image.rspmap_data.shape[1] pointsData = np.array([ np.unravel_index(image.rspmap_data[0, i, ...].argmax(), rspmapShape) for i in range(n_points) ], dtype=np.float32) # width_ratio = float(image.shape[1])/image.rspmap_data.shape[3] # height_ratio = float(image.shape[0])/ image.rspmap_data.shape[2] # pointsData *= [height_ratio, width_ratio] points = PointCloud(pointsData) points.project_weight = None return points
def augment_menpo_img_geom(img, p_geom=0): img = img.copy() if p_geom > 0.5: lms_geom_warp = deform_face_geometric_style( img.landmarks['PTS'].points.copy(), p_scale=p_geom, p_shift=p_geom) img = warp_face_image_tps(img, PointCloud(lms_geom_warp)) return img
def augment_menpo_img_geom(img, p_geom=0.): """geometric style image augmentation using random face deformations""" img = img.copy() if p_geom > 0.5: grp_name = img.landmarks.group_labels[0] lms_geom_warp = deform_face_geometric_style(img.landmarks[grp_name].points.copy(), p_scale=p_geom, p_shift=p_geom) img = warp_face_image_tps(img, PointCloud(lms_geom_warp), grp_name) return img
def get_mirrored_image(image, shape, init): # Read a random image with landmarks and bb image_m = menpo.image.Image(image.transpose((2, 0, 1))) image_m.landmarks['init'] = PointCloud(init) image_m = utils.mirror_image(image_m) mirrored_image = image_m.pixels.transpose(1, 2, 0).astype('float32') mirrored_init = image_m.landmarks['init'].points.astype('float32') return image, init, mirrored_image, mirrored_init, shape
def load_image(path, proportion, size): mp_image = mio.import_image(path) assert isinstance(mp_image, menpo.image.Image) miny, minx = np.min(mp_image.landmarks['PTS'].bounding_box().points, 0) maxy, maxx = np.max(mp_image.landmarks['PTS'].bounding_box().points, 0) bbsize = max(maxx - minx, maxy - miny) pady = int( max(max(bbsize * proportion - miny, 0), max(maxy + bbsize * proportion - mp_image.height, 0))) + 100 padx = int( max(max(bbsize * proportion - minx, 0), max(maxx + bbsize * proportion - mp_image.width, 0))) + 100 c, h, w = mp_image.pixels.shape pad_image = np.random.rand(c, h + pady + pady, w + padx + padx) pad_image[:, pady:pady + h, padx:padx + w] = mp_image.pixels pad_shape = mp_image.landmarks['PTS'].points + np.array([pady, padx]) mp_image = menpo.image.Image(pad_image) mp_image.landmarks['PTS'] = PointCloud(pad_shape) assert isinstance(mp_image, menpo.image.Image) miny, minx = np.min(mp_image.landmarks['PTS'].bounding_box().points, 0) maxy, maxx = np.max(mp_image.landmarks['PTS'].bounding_box().points, 0) bbsize = max(maxx - minx, maxy - miny) center = [(miny + maxy) / 2., (minx + maxx) / 2.] mp_image.landmarks['bb'] = PointCloud([ [center[0] - bbsize * 0.5, center[1] - bbsize * 0.5], [center[0] + bbsize * 0.5, center[1] + bbsize * 0.5], ]).bounding_box() mp_image = mp_image.crop_to_landmarks_proportion( proportion, group='bb', constrain_to_boundary=False) assert isinstance(mp_image, menpo.image.Image) mp_image = mp_image.resize((size, size)) assert isinstance(mp_image, menpo.image.Image) mp_image = grey_to_rgb(mp_image) assert isinstance(mp_image, menpo.image.Image) return mp_image
def align_shapes(im, reference_shape, init=True, bb_hat=None): reference_shape = PointCloud(reference_shape) if init: bb = im.landmarks['bb'].lms.bounding_box() im.landmarks['__initial'] = align_shape_with_bounding_box( reference_shape, bb) im = im.rescale_to_pointcloud(reference_shape, group='__initial') lms = im.landmarks['PTS'].lms init = im.landmarks['__initial'].lms bb_hat = im.landmarks['bb'].lms # im = im.resize((235,200)) pixels = grey_to_rgb(im).pixels.transpose(1, 2, 0).copy() height, width = pixels.shape[:2] padded_image = np.random.rand(395, 467, 3).astype(np.float32) dy = max(int((395 - height - 1) / 2), 0) dx = max(int((467 - width - 1) / 2), 0) pts = lms.points pts[:, 0] += dy pts[:, 1] += dx init_pts = init.points init_pts[:, 0] += dy init_pts[:, 1] += dx bb_pts = bb_hat.points bb_pts[:, 0] += dy bb_pts[:, 1] += dx lms = lms.from_vector(pts) init = init.from_vector(init_pts) bb_hat = bb_hat.from_vector(bb_pts) padded_image[dy:(height + dy), dx:(width + dx), :] = pixels gt = lms.points.astype(np.float32) init = init.points.astype(np.float32) return np.expand_dims(padded_image, 0), np.expand_dims(init, 0), np.expand_dims( gt, 0), bb_hat.bounding_box() else: bb = bb_hat # print(bb.points) im.landmarks['a'] = align_shape_with_bounding_box(reference_shape, bb) init = im.landmarks['a'].lms init = init.points.astype(np.float32) # print(PointCloud(init).bounding_box().points) return np.expand_dims(init, 0)
def load_images_test(paths, reference_shape, group=None, verbose=True, PLOT=False): """Loads and rescales input knn_2D to the diagonal of the reference shape. Args: paths: a list of strings containing the data directories. reference_shape (meanshape): a numpy array [num_landmarks, 2] group: landmark group containing the grounth truth landmarks. verbose: boolean, print debugging info. Returns: knn_2D: a list of numpy arrays containing knn_2D. shapes: a list of the ground truth landmarks. reference_shape (meanshape): a numpy array [num_landmarks, 2]. shape_gen: PCAModel, a shape generator. """ images = [] shapes = [] scales = [] # compute mean shape reference_shape = PointCloud(reference_shape) nameList = [] bbox = [] data = dict() for path in paths: if verbose: print('Importing data from {}'.format(path)) for im in mio.import_images(path, verbose=verbose, as_generator=True): # group = group or im.landmarks[group]._group_label group = group or im.landmarks.keys()[0] bb_root = im.path.parent.relative_to(im.path.parent.parent.parent) if 'set' not in str(bb_root): bb_root = im.path.parent.relative_to(im.path.parent.parent) im.landmarks['bb'] = mio.import_landmark_file( str( Path('bbs') / bb_root / (im.path.stem.replace(' ', '') + '.pts'))) nameList.append(str(im.path)) lms = im.landmarks['bb'].lms.points bbox.append([lms[0, 1], lms[2, 1], lms[0, 0], lms[1, 0]]) # bbox = np.array(bbox) # data['nameList'] = nameList # data['bbox'] = bbox # sio.savemat('ibug_data.mat', {'nameList':data['nameList'], 'bbox':data['bbox']}) # exit(0) im = im.crop_to_landmarks_proportion(0.3, group='bb') images.append(im) return images
def flip_predictions(predictions, shapes): flipped_preds = [] for pred, shape in zip(predictions, shapes): pred = PointCloud(pred) if pred.points.shape[0] == 68: pred = utils.mirror_landmarks_68(pred, shape) elif pred.points.shape[0] == 73: pred = utils.mirror_landmarks_73(pred, shape) flipped_preds.append(pred.points) return np.array(flipped_preds, np.float32)
def get_noisy_init_from_bb(reference_shape, bb, noise_percentage=.02): """Roughly aligns a reference shape to a bounding box. This adds some uniform noise for translation and scale to the aligned shape. Args: reference_shape: a numpy array [num_landmarks, 2] bb: bounding box, a numpy array [4, ] noise_percentage: noise presentation to add. Returns: The aligned shape, as a numpy array [num_landmarks, 2] """ bb = PointCloud(bb) reference_shape = PointCloud(reference_shape) bb = noisy_shape_from_bounding_box( reference_shape, bb, noise_percentage=[noise_percentage, 0, noise_percentage]).bounding_box() return align_shape_with_bounding_box(reference_shape, bb).points
def scale_image(image, shape): mp_image = menpo.image.Image(image.transpose((2, 0, 1))) mp_image.landmarks['PTS'] = PointCloud(shape) bb = mp_image.landmarks['PTS'].bounding_box().points miny, minx = np.min(bb, 0) maxy, maxx = np.max(bb, 0) bbsize = max(maxx - minx, maxy - miny) center = [(miny + maxy) / 2., (minx + maxx) / 2.] mp_image.landmarks['bb'] = PointCloud( [ [center[0] - bbsize * 0.5, center[1] - bbsize * 0.5], [center[0] + bbsize * 0.5, center[1] + bbsize * 0.5], ] ).bounding_box() mp_image = mp_image.crop_to_landmarks_proportion(1. / 6., group='bb') mp_image = mp_image.resize((112, 112)) image = mp_image.pixels.transpose((1, 2, 0)) shape = mp_image.landmarks['PTS'].points init = _mean_shape return image.astype(np.float32), shape.astype(np.float32), init.astype(np.float32)
def load_image(path, reference_shape, is_training=False, group='PTS', mirror_image=False): """Load an annotated image. In the directory of the provided image file, there should exist a landmark file (.pts) with the same basename as the image file. Args: path: a path containing an image file. reference_shape: a numpy array [num_landmarks, 2] is_training: whether in training mode or not. group: landmark group containing the grounth truth landmarks. mirror_image: flips horizontally the image's pixels and landmarks. Returns: pixels: a numpy array [width, height, 3]. estimate: an initial estimate a numpy array [68, 2]. gt_truth: the ground truth landmarks, a numpy array [68, 2]. """ im = mio.import_image(path) bb_root = im.path.parent.relative_to(im.path.parent.parent.parent) if 'set' not in str(bb_root): bb_root = im.path.parent.relative_to(im.path.parent.parent) im.landmarks['bb'] = mio.import_landmark_file( str(Path('bbs') / bb_root / (im.path.stem + '.pts'))) im = im.crop_to_landmarks_proportion(0.3, group='bb') reference_shape = PointCloud(reference_shape) bb = im.landmarks['bb'].lms.bounding_box() im.landmarks['__initial'] = align_shape_with_bounding_box( reference_shape, bb) im = im.rescale_to_pointcloud(reference_shape, group='__initial') if mirror_image: im = utils.mirror_image(im) lms = im.landmarks[group].lms initial = im.landmarks['__initial'].lms # if the image is greyscale then convert to rgb. pixels = grey_to_rgb(im).pixels.transpose(1, 2, 0) gt_truth = lms.points.astype(np.float32) estimate = initial.points.astype(np.float32) return pixels.astype(np.float32).copy(), gt_truth, estimate
def crop_img_facial(img, margin=0.5): img_bounds = img.bounds()[1] grp_name = img.landmarks.group_labels[0] bb_menpo = img.landmarks[grp_name].bounding_box().points bb = np.array( [[bb_menpo[0, 1], bb_menpo[0, 0], bb_menpo[2, 1], bb_menpo[2, 0]]]) bb = center_margin_bb(bb, img_bounds, margin=margin) bb_pointcloud = PointCloud( np.array([[bb[0, 1], bb[0, 0]], [bb[0, 3], bb[0, 0]], [bb[0, 3], bb[0, 2]], [bb[0, 1], bb[0, 2]]])) face_crop = img.crop_to_pointcloud(bb_pointcloud) face_crop = face_crop.resize([256, 256]) return face_crop
def detect_landmark(img): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor( "/home/KLTN_TheFaceOfArtFaceParsing/Updates/face_warp/shape_predictor_68_face_landmarks.dat" ) faces = detector(gray) for face in faces: landmarks = predictor(gray, face) landmarks_points = [] for n in range(0, 68): x = landmarks.part(n).x y = landmarks.part(n).y landmarks_points.append((y, x)) cv2.imwrite( '/home/KLTN_TheFaceOfArtFaceParsing/Updates/face_warp/input/input.png', img) mio.export_landmark_file( PointCloud(landmarks_points), '/home/KLTN_TheFaceOfArtFaceParsing/Updates/face_warp/input/input.pts', overwrite=True)
def load_images(paths, group=None, verbose=True): """Loads and rescales input images to the diagonal of the reference shape. Args: paths: a list of strings containing the data directories. reference_shape: a numpy array [num_landmarks, 2] group: landmark group containing the grounth truth landmarks. verbose: boolean, print debugging info. Returns: images: a list of numpy arrays containing images. shapes: a list of the ground truth landmarks. reference_shape: a numpy array [num_landmarks, 2]. shape_gen: PCAModel, a shape generator. """ images = [] shapes = [] bbs = [] reference_shape = PointCloud(build_reference_shape(paths)) for path in paths: if verbose: print('Importing data from {}'.format(path)) for im in mio.import_images(path, verbose=verbose, as_generator=True): group = group or im.landmarks[group]._group_label bb_root = im.path.parent.relative_to(im.path.parent.parent.parent) if 'set' not in str(bb_root): bb_root = im.path.parent.relative_to(im.path.parent.parent) im.landmarks['bb'] = mio.import_landmark_file( str(Path('bbs') / bb_root / (im.path.stem + '.pts'))) im = im.crop_to_landmarks_proportion(0.3, group='bb') im = im.rescale_to_pointcloud(reference_shape, group=group) im = grey_to_rgb(im) images.append(im.pixels.transpose(1, 2, 0)) shapes.append(im.landmarks[group].lms) bbs.append(im.landmarks['bb'].lms) train_dir = Path(FLAGS.train_dir) mio.export_pickle(reference_shape.points, train_dir / 'reference_shape.pkl', overwrite=True) print('created reference_shape.pkl using the {} group'.format(group)) pca_model = detect.create_generator(shapes, bbs) # Pad images to max length max_shape = np.max([im.shape for im in images], axis=0) max_shape = [len(images)] + list(max_shape) padded_images = np.random.rand(*max_shape).astype(np.float32) print(padded_images.shape) for i, im in enumerate(images): height, width = im.shape[:2] dy = max(int((max_shape[1] - height - 1) / 2), 0) dx = max(int((max_shape[2] - width - 1) / 2), 0) lms = shapes[i] pts = lms.points pts[:, 0] += dy pts[:, 1] += dx lms = lms.from_vector(pts) padded_images[i, dy:(height + dy), dx:(width + dx)] = im return padded_images, shapes, reference_shape.points, pca_model
def train(scope=''): """Train on dataset for a number of steps.""" with tf.Graph().as_default(), tf.device('/gpu:0'): # Global steps tf_global_step = tf.get_variable( 'GlobalStep', [], initializer=tf.constant_initializer(0), trainable=False) # Learning rate tf_lr = tf.train.exponential_decay(g_config['learning_rate'], tf_global_step, g_config['learning_rate_step'], g_config['learning_rate_decay'], staircase=True, name='LearningRate') tf.summary.scalar('learning_rate', tf_lr) # Create an optimizer that performs gradient descent. opt = tf.train.AdamOptimizer(tf_lr) data_provider.prepare_images(g_config['train_dataset'].split(':'), num_patches=g_config['num_patches'], verbose=True) path_base = Path(g_config['train_dataset'].split(':')[0]).parent.parent _mean_shape = mio.import_pickle(path_base / 'reference_shape.pkl') with Path(path_base / 'meta.txt').open('r') as ifs: _image_shape = [int(x) for x in ifs.read().split(' ')] assert (isinstance(_mean_shape, np.ndarray)) _pca_shapes = [] _pca_bbs = [] for item in tf.io.tf_record_iterator(str(path_base / 'pca.bin')): example = tf.train.Example() example.ParseFromString(item) _pca_shape = np.array(example.features.feature['pca/shape']. float_list.value).reshape((-1, 2)) _pca_bb = np.array( example.features.feature['pca/bb'].float_list.value).reshape( (-1, 2)) _pca_shapes.append(PointCloud(_pca_shape)) _pca_bbs.append(PointCloud(_pca_bb)) _pca_model = detect.create_generator(_pca_shapes, _pca_bbs) assert (_mean_shape.shape[0] == g_config['num_patches']) tf_mean_shape = tf.constant(_mean_shape, dtype=tf.float32, name='MeanShape') def decode_feature(serialized): feature = { 'train/image': tf.FixedLenFeature([], tf.string), 'train/shape': tf.VarLenFeature(tf.float32), } features = tf.parse_single_example(serialized, features=feature) decoded_image = tf.decode_raw(features['train/image'], tf.float32) decoded_image = tf.reshape(decoded_image, _image_shape) decoded_shape = tf.sparse.to_dense(features['train/shape']) decoded_shape = tf.reshape(decoded_shape, (g_config['num_patches'], 2)) return decoded_image, decoded_shape def get_random_sample(image, shape, rotation_stddev=10): # Read a random image with landmarks and bb image = menpo.image.Image(image.transpose((2, 0, 1)), copy=False) image.landmarks['PTS'] = PointCloud(shape) if np.random.rand() < .5: image = utils.mirror_image(image) if np.random.rand() < .5: theta = np.random.normal(scale=rotation_stddev) rot = menpo.transform.rotate_ccw_about_centre( image.landmarks['PTS'], theta) image = image.warp_to_shape(image.shape, rot) bb = image.landmarks['PTS'].bounding_box().points miny, minx = np.min(bb, 0) maxy, maxx = np.max(bb, 0) bbsize = max(maxx - minx, maxy - miny) center = [(miny + maxy) / 2., (minx + maxx) / 2.] image.landmarks['bb'] = PointCloud([ [center[0] - bbsize * 0.5, center[1] - bbsize * 0.5], [center[0] + bbsize * 0.5, center[1] + bbsize * 0.5], ]).bounding_box() proportion = float(np.random.rand() / 3) image = image.crop_to_landmarks_proportion(proportion, group='bb') image = image.resize((112, 112)) random_image = image.pixels.transpose(1, 2, 0).astype('float32') random_shape = image.landmarks['PTS'].points.astype('float32') return random_image, random_shape def get_init_shape(image, shape, mean_shape): def norm(x): return tf.sqrt( tf.reduce_sum(tf.square(x - tf.reduce_mean(x, 0)))) with tf.name_scope('align_shape_to_bb', values=[mean_shape]): min_xy = tf.reduce_min(mean_shape, 0) max_xy = tf.reduce_max(mean_shape, 0) min_x, min_y = min_xy[0], min_xy[1] max_x, max_y = max_xy[0], max_xy[1] mean_shape_bb = tf.stack([[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]) bb = tf.stack([[0.0, 0.0], [112.0, 0.0], [112.0, 112.0], [0.0, 112.0]]) ratio = norm(bb) / norm(mean_shape_bb) initial_shape = tf.add( (mean_shape - tf.reduce_mean(mean_shape_bb, 0)) * ratio, tf.reduce_mean(bb, 0), name='initial_shape') initial_shape.set_shape(tf_mean_shape.get_shape()) return image, shape, initial_shape def distort_color(image, shape, init_shape): return data_provider.distort_color(image), shape, init_shape with tf.name_scope('DataProvider', values=[tf_mean_shape]): tf_dataset = tf.data.TFRecordDataset( [str(path_base / 'train.bin')]) tf_dataset = tf_dataset.repeat() tf_dataset = tf_dataset.map(decode_feature) tf_dataset = tf_dataset.map(lambda x, y: tf.py_func( get_random_sample, [x, y], [tf.float32, tf.float32], stateful=True, name='RandomSample')) tf_dataset = tf_dataset.map( partial(get_init_shape, mean_shape=tf_mean_shape)) tf_dataset = tf_dataset.map(distort_color) tf_dataset = tf_dataset.batch(g_config['batch_size'], True) tf_dataset = tf_dataset.prefetch(7500) tf_iterator = tf_dataset.make_one_shot_iterator() tf_images, tf_shapes, tf_initial_shapes = tf_iterator.get_next( name='Batch') tf_images.set_shape([g_config['batch_size'], 112, 112, 3]) tf_shapes.set_shape([g_config['batch_size'], 73, 2]) tf_initial_shapes.set_shape([g_config['batch_size'], 73, 2]) print('Defining model...') with tf.device(g_config['train_device']): tf_model = mdm_model.MDMModel( tf_images, tf_shapes, tf_initial_shapes, batch_size=g_config['batch_size'], num_iterations=g_config['num_iterations'], num_patches=g_config['num_patches'], patch_shape=(g_config['patch_size'], g_config['patch_size']), num_channels=3) with tf.name_scope('Losses', values=[tf_model.prediction, tf_shapes]): tf_norm_error = tf_model.normalized_rmse( tf_model.prediction, tf_shapes) tf_loss = tf.reduce_mean(tf_norm_error) tf.summary.scalar('losses/total', tf_loss) # Calculate the gradients for the batch of data tf_grads = opt.compute_gradients(tf_loss) tf.summary.histogram('dx', tf_model.prediction - tf_shapes) bn_updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) # Add histograms for gradients. for grad, var in tf_grads: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) # Apply the gradients to adjust the shared variables. with tf.name_scope('Optimizer', values=[tf_grads, tf_global_step]): apply_gradient_op = opt.apply_gradients(tf_grads, global_step=tf_global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) # Track the moving averages of all trainable variables. # Note that we maintain a "double-average" of the BatchNormalization # global statistics. This is more complicated then need be but we employ # this for backward-compatibility with our previous models. with tf.name_scope('MovingAverage', values=[tf_global_step]): variable_averages = tf.train.ExponentialMovingAverage( g_config['MOVING_AVERAGE_DECAY'], tf_global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply( variables_to_average) # Group all updates to into a single train op. bn_updates_op = tf.group(*bn_updates, name='BNGroup') train_op = tf.group(apply_gradient_op, variables_averages_op, bn_updates_op, name='TrainGroup') # Create a saver. saver = tf.train.Saver() # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. allow_soft_placement must be # set to True to build towers on GPU, as some of the ops do not have GPU # implementations. config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) # Build an initialization operation to run below. init = tf.global_variables_initializer() print('Initializing variables...') sess.run(init) print('Initialized variables.') start_step = 0 ckpt = tf.train.get_checkpoint_state(g_config['train_dir']) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /ckpt/train/model.ckpt-0, # extract global_step from it. start_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) + 1 print('%s: Pre-trained model restored from %s' % (datetime.now(), g_config['train_dir'])) summary_writer = tf.summary.FileWriter(g_config['train_dir'], sess.graph) print('Starting training...') for step in range(start_step, g_config['max_steps']): start_time = time.time() _, loss_value = sess.run([train_op, tf_loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: examples_per_sec = g_config['batch_size'] / float(duration) format_str = ( '%s: step %d, loss = %.4f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, duration)) if step % 200 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == g_config['max_steps']: checkpoint_path = os.path.join(g_config['train_dir'], 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def __init__(self, points, polylist): PointCloud.__init__(self, points) self.polylist = polylist
def prepare_images(paths, num_patches=73, verbose=True): """Save Train Images to TFRecord Args: paths: a list of strings containing the data directories. num_patches: number of landmarks verbose: boolean, print debugging info. Returns: None """ if len(paths) == 0: return # .../<Dataset>/Images/*.png -> .../<Dataset> path_base = Path(paths[0]).parent.parent image_paths = [] # First: get all image paths for path in paths: for file in Path('.').glob(path): try: mio.import_landmark_file( str(Path(file.parent.parent / 'BoundingBoxes' / (file.stem + '.pts'))) ) except ValueError: continue image_paths.append(file) print('Got all image paths...') # Second: split to train, test and validate. 7:2:1 if Path(path_base / 'train_img.txt').exists(): with Path(path_base / 'train_img.txt').open('rb') as train_ifs, \ Path(path_base / 'test_img.txt').open('rb') as test_ifs, \ Path(path_base / 'val_img.txt').open('rb') as val_ifs: train_paths = [Path(line[:-1].decode('utf-8')) for line in train_ifs.readlines()] test_paths = [Path(line[:-1].decode('utf-8')) for line in test_ifs.readlines()] val_paths = [Path(line[:-1].decode('utf-8')) for line in val_ifs.readlines()] else: random.shuffle(image_paths) num_train = int(len(image_paths) * 0.7) num_test = int(len(image_paths) * 0.2) train_paths = sorted(image_paths[:num_train]) test_paths = sorted(image_paths[num_train:num_train+num_test]) val_paths = sorted(image_paths[num_train+num_test:]) with Path(path_base / 'train_img.txt').open('wb') as train_ofs, \ Path(path_base / 'test_img.txt').open('wb') as test_ofs, \ Path(path_base / 'val_img.txt').open('wb') as val_ofs: train_ofs.writelines([str(line).encode('utf-8') + b'\n' for line in train_paths]) test_ofs.writelines([str(line).encode('utf-8') + b'\n' for line in test_paths]) val_ofs.writelines([str(line).encode('utf-8') + b'\n' for line in val_paths]) print('Found Train/Test/Validate {}/{}/{}'.format(len(train_paths), len(test_paths), len(val_paths))) # Third: export reference shape on train if Path(path_base / 'reference_shape.pkl').exists(): reference_shape = PointCloud(mio.import_pickle(path_base / 'reference_shape.pkl')) else: reference_shape = PointCloud(build_reference_shape(train_paths, num_patches)) mio.export_pickle(reference_shape.points, path_base / 'reference_shape.pkl', overwrite=True) print('Created reference_shape.pkl') # Fourth: image shape & pca image_shape = [0, 0, 3] # [H, W, C] if Path(path_base / 'pca.bin').exists() and Path(path_base / 'meta.txt').exists(): with Path(path_base / 'meta.txt').open('r') as ifs: image_shape = [int(x) for x in ifs.read().split(' ')] else: with tf.io.TFRecordWriter(str(path_base / 'pca.bin')) as ofs: counter = 0 for path in train_paths: counter += 1 if verbose: status = 10.0 * counter / len(train_paths) status_str = '\rPreparing {:2.2f}%['.format(status * 10) for i in range(int(status)): status_str += '=' for i in range(int(status), 10): status_str += ' ' status_str += '] {} '.format(path) print(status_str, end='') mp_image = mio.import_image(path) mp_image.landmarks['bb'] = mio.import_landmark_file( str(Path(mp_image.path.parent.parent / 'BoundingBoxes' / (mp_image.path.stem + '.pts'))) ) mp_image = mp_image.crop_to_landmarks_proportion(0.3, group='bb') mp_image = mp_image.rescale_to_pointcloud(reference_shape, group='PTS') mp_image = grey_to_rgb(mp_image) assert(mp_image.pixels.shape[0] == image_shape[2]) image_shape[0] = max(mp_image.pixels.shape[1], image_shape[0]) image_shape[1] = max(mp_image.pixels.shape[2], image_shape[1]) features = tf.train.Features( feature={ 'pca/shape': tf.train.Feature( float_list=tf.train.FloatList(value=mp_image.landmarks['PTS'].points.flatten()) ), 'pca/bb': tf.train.Feature( float_list=tf.train.FloatList(value=mp_image.landmarks['bb'].points.flatten()) ), } ) ofs.write(tf.train.Example(features=features).SerializeToString()) if verbose: print('') with Path(path_base / 'meta.txt').open('w') as ofs: for s in image_shape[:-1]: ofs.write('{} '.format(s)) ofs.write('{}'.format(image_shape[-1])) print('Image shape', image_shape) # Fifth: train data if Path(path_base / 'train.bin').exists(): pass else: random.shuffle(train_paths) with tf.io.TFRecordWriter(str(path_base / 'train.bin')) as ofs: print('Preparing train data...') counter = 0 for path in train_paths: counter += 1 if verbose: status = 10.0 * counter / len(train_paths) status_str = '\rPreparing {:2.2f}%['.format(status * 10) for i in range(int(status)): status_str += '=' for i in range(int(status), 10): status_str += ' ' status_str += '] {} '.format(path) print(status_str, end='') mp_image = mio.import_image(path) mp_image.landmarks['bb'] = mio.import_landmark_file( str(Path(mp_image.path.parent.parent / 'BoundingBoxes' / (mp_image.path.stem + '.pts'))) ) mp_image = mp_image.crop_to_landmarks_proportion(0.3, group='bb') mp_image = mp_image.rescale_to_pointcloud(reference_shape, group='PTS') mp_image = grey_to_rgb(mp_image) # Padding to the same size height, width = mp_image.pixels.shape[1:] # [C, H, W] dy = max(int((image_shape[0] - height - 1) / 2), 0) dx = max(int((image_shape[1] - width - 1) / 2), 0) padded_image = np.random.rand(*image_shape).astype(np.float32) padded_image[dy:(height + dy), dx:(width + dx), :] = mp_image.pixels.transpose(1, 2, 0) padded_landmark = mp_image.landmarks['PTS'].points padded_landmark[:, 0] += dy padded_landmark[:, 1] += dx features = tf.train.Features( feature={ 'train/image': tf.train.Feature( bytes_list=tf.train.BytesList(value=[tf.compat.as_bytes(padded_image.tostring())]) ), 'train/shape': tf.train.Feature( float_list=tf.train.FloatList(value=padded_landmark.flatten()) ) } ) ofs.write(tf.train.Example(features=features).SerializeToString()) if verbose: print('') # Sixth: test data if Path(path_base / 'test.bin').exists(): pass else: with tf.io.TFRecordWriter(str(path_base / 'test.bin')) as ofs: print('Preparing test data...') counter = 0 for path in test_paths: counter += 1 if verbose: status = 10.0 * counter / len(test_paths) status_str = '\rPreparing {:2.2f}%['.format(status * 10) for i in range(int(status)): status_str += '=' for i in range(int(status), 10): status_str += ' ' status_str += '] {} '.format(path) print(status_str, end='') mp_image = mio.import_image(path) mp_image.landmarks['bb'] = mio.import_landmark_file( str(Path(mp_image.path.parent.parent / 'BoundingBoxes' / (mp_image.path.stem + '.pts'))) ) mp_image = mp_image.crop_to_landmarks_proportion(0.3, group='bb') mp_bb = mp_image.landmarks['bb'].bounding_box() mp_image.landmarks['init'] = align_shape_with_bounding_box(reference_shape, mp_bb) mp_image = mp_image.rescale_to_pointcloud(reference_shape, group='init') mp_image = grey_to_rgb(mp_image) # Padding to the same size height, width = mp_image.pixels.shape[1:] # [C, H, W] dy = max(int((256 - height - 1) / 2), 0) # 200*(1+0.3*2)/sqrt(2) == 226.7 dx = max(int((256 - width - 1) / 2), 0) # 200*(1+0.3*2)/sqrt(2) == 226.7 padded_image = np.random.rand(256, 256, 3).astype(np.float32) padded_image[dy:(height + dy), dx:(width + dx), :] = mp_image.pixels.transpose(1, 2, 0) padded_landmark = mp_image.landmarks['PTS'].points padded_landmark[:, 0] += dy padded_landmark[:, 1] += dx padded_init_landmark = mp_image.landmarks['init'].points padded_init_landmark[:, 0] += dy padded_init_landmark[:, 1] += dx features = tf.train.Features( feature={ 'test/image': tf.train.Feature( bytes_list=tf.train.BytesList( value=[tf.compat.as_bytes(padded_image.tostring())]) ), 'test/shape': tf.train.Feature( float_list=tf.train.FloatList(value=padded_landmark.flatten()) ), 'test/init': tf.train.Feature( float_list=tf.train.FloatList(value=padded_init_landmark.flatten()) ) } ) ofs.write(tf.train.Example(features=features).SerializeToString()) if verbose: print('')
def load_images_test_300VW(paths, reference_shape, group=None, verbose=True, PLOT=False): """Loads and rescales input knn_2D to the diagonal of the reference shape. Args: paths: a list of strings containing the data directories. reference_shape (meanshape): a numpy array [num_landmarks, 2] group: landmark group containing the grounth truth landmarks. verbose: boolean, print debugging info. Returns: knn_2D: a list of numpy arrays containing knn_2D. shapes: a list of the ground truth landmarks. reference_shape (meanshape): a numpy array [num_landmarks, 2]. shape_gen: PCAModel, a shape generator. """ images = [] shapes = [] scales = [] # compute mean shape reference_shape = PointCloud(reference_shape) for path in paths: if verbose: print('Importing data from {}'.format(path)) for im in mio.import_images(path, verbose=verbose, as_generator=True): # group = group or im.landmarks[group]._group_label # pdb.set_trace() # bb_root = im.path.parent.relative_to(im.path.parent.parent.parent) bb_root = im.path.parent if 'set' not in str(bb_root): bb_root = im.path.parent.relative_to(im.path.parent.parent) im.landmarks['bb'] = mio.import_landmark_file( bb_root / str(Path('bbs') / (im.path.stem + '.pts'))) im.landmarks['PTS'] = mio.import_landmark_file( bb_root / str(Path('annot') / (im.path.stem + '.pts'))) im = im.crop_to_landmarks_proportion(0.3, group='bb') # im = im.rescale_to_pointcloud(reference_shape, group=group) # _, height, width = im.pixels.shape # im = im.resize([386, 458]) # im = grey_to_rgb(im) # knn_2D.append(im.pixels.transpose(1, 2, 0)) # shapes.append(im.landmarks[group].lms.points.astype('float32')) # scales.append([386/height, 485/width]) # lms = im.landmarks[group].lms # im = im.pixels.transpose(1, 2, 0) # height, width = im.shape[:2] # # print('shape:', height, width) # padded_image = np.random.rand(386, 458, 3).astype(np.float32) # dy = max(int((386 - height - 1) / 2), 0) # dx = max(int((458 - width - 1) / 2), 0) # pts = lms.points # pts[:, 0] += dy # pts[:, 1] += dx # # delta[i][:, 0] = dy # # delta[i][:, 1] = dx # lms = lms.from_vector(pts) # padded_image[dy:(height+dy), dx:(width+dx), :] = im images.append(im) # shapes.append(lms.points.astype('float32')) return images
def load_images_aflw(paths, group=None, verbose=True, PLOT=True, AFLW=False, PLOT_shape=False): """Loads and rescales input knn_2D to the diagonal of the reference shape. Args: paths: a list of strings containing the data directories. reference_shape (meanshape): a numpy array [num_landmarks, 2] group: landmark group containing the grounth truth landmarks. verbose: boolean, print debugging info. Returns: knn_2D: a list of numpy arrays containing knn_2D. shapes: a list of the ground truth landmarks. reference_shape (meanshape): a numpy array [num_landmarks, 2]. shape_gen: PCAModel, a shape generator. """ images = [] shapes = [] bbs = [] shape_space = [] plot_shape_x = [] plot_shape_y = [] # compute mean shape if AFLW: # reference_shape = PointCloud(mio.import_pickle(Path('/home/hliu/gmh/RL_FA/mdm_aflw/ckpt/train_aflw') / 'reference_shape.pkl')) reference_shape = mio.import_pickle( Path('/home/hliu/gmh/RL_FA/mdm_aflw/ckpt/train_aflw') / 'reference_shape.pkl') else: reference_shape = PointCloud(build_reference_shape(paths)) for path in paths: if verbose: print('Importing data from {}'.format(path)) for im in mio.import_images(path, verbose=verbose, as_generator=True): # group = group or im.landmarks[group]._group_label group = group or im.landmarks.keys()[0] bb_root = im.path.parent.relative_to(im.path.parent.parent.parent) if 'set' not in str(bb_root): bb_root = im.path.parent.relative_to(im.path.parent.parent) if AFLW: im.landmarks['bb'] = im.landmarks['PTS'].lms.bounding_box() else: im.landmarks['bb'] = mio.import_landmark_file( str(Path('bbs') / bb_root / (im.path.stem + '.pts'))) im = im.crop_to_landmarks_proportion(0.3, group='bb') im = im.rescale_to_pointcloud(reference_shape, group=group) im = grey_to_rgb(im) # knn_2D.append(im.pixels.transpose(1, 2, 0)) shapes.append(im.landmarks[group].lms) shape_space.append(im.landmarks[group].lms.points) bbs.append(im.landmarks['bb'].lms) if PLOT_shape: x_tmp = np.sum((im.landmarks[group].lms.points[:, 0] - reference_shape.points[:, 0])) y_tmp = np.sum((im.landmarks[group].lms.points[:, 1] - reference_shape.points[:, 1])) if x_tmp < 0 and y_tmp < 0: plot_shape_x.append(x_tmp) plot_shape_y.append(y_tmp) shape_space = np.array(shape_space) print('shape_space:', shape_space.shape) train_dir = Path(FLAGS.train_dir) if PLOT_shape: k_nn_plot_x = [] k_nn_plot_y = [] centers = utils.k_means(shape_space, 500, num_patches=19) centers = np.reshape(centers, [-1, 19, 2]) for i in range(centers.shape[0]): x_tmp = np.sum((centers[i, :, 0] - reference_shape.points[:, 0])) y_tmp = np.sum((centers[i, :, 1] - reference_shape.points[:, 1])) if x_tmp < 0 and y_tmp < 0: k_nn_plot_x.append(x_tmp) k_nn_plot_y.append(y_tmp) # plt.scatter(plot_shape_x, plot_shape_y, s=20) # plt.scatter(k_nn_plot_x, k_nn_plot_y, s=40) # plt.xticks(()) # plt.yticks(()) # plt.show() # pdb.set_trace() np.save(train_dir / 'shape_space_all.npy', shape_space) # centers = utils.k_means(shape_space, 100) # centers = np.reshape(centers, [-1, 68, 2]) # np.save(train_dir/'shape_space_origin.npy', centers) # print('created shape_space.npy using the {} group'.format(group)) # exit(0) mio.export_pickle(reference_shape.points, train_dir / 'reference_shape.pkl', overwrite=True) print('created reference_shape.pkl using the {} group'.format(group)) pca_model = detect.create_generator(shapes, bbs) # Pad knn_2D to max length max_shape = [272, 261, 3] padded_images = np.random.rand(*max_shape).astype(np.float32) print(padded_images.shape) if PLOT: # plot without padding centers = utils.k_means(shape_space, 500, num_patches=19) centers = np.reshape(centers, [-1, 19, 2]) plot_img = cv2.imread('a.png').transpose(2, 0, 1) centers_tmp = np.zeros(centers.shape) # menpo_img = mio.import_image('a.png') menpo_img = menpo.image.Image(plot_img) for i in range(centers.shape[0]): menpo_img.view() min_y = np.min(centers[i, :, 0]) min_x = np.min(centers[i, :, 1]) centers_tmp[i, :, 0] = centers[i, :, 0] - min_y + 20 centers_tmp[i, :, 1] = centers[i, :, 1] - min_x + 20 print(centers_tmp[i, :, :]) menpo_img.landmarks['center'] = PointCloud(centers_tmp[i, :, :]) menpo_img.view_landmarks(group='center', marker_face_colour='b', marker_size='16') # menpo_img.landmarks['center'].view(render_legend=True) plt.savefig('plot_shape_space_aflw/' + str(i) + '.png') plt.close() exit(0) # !!!shape_space without delta, which means shape_space has already been padded! # delta = np.zeros(shape_space.shape) for i, im in enumerate(images): height, width = im.shape[:2] dy = max(int((max_shape[0] - height - 1) / 2), 0) dx = max(int((max_shape[1] - width - 1) / 2), 0) lms = shapes[i] pts = lms.points pts[:, 0] += dy pts[:, 1] += dx shape_space[i, :, 0] += dy shape_space[i, :, 1] += dx # delta[i][:, 0] = dy # delta[i][:, 1] = dx lms = lms.from_vector(pts) padded_images[i, dy:(height + dy), dx:(width + dx)] = im # shape_space = np.concatenate((shape_space, delta), 2) centers = utils.k_means(shape_space, 1000, num_patches=19) centers = np.reshape(centers, [-1, 19, 2]) # pdb.set_trace() np.save(train_dir / 'shape_space.npy', centers) print('created shape_space.npy using the {} group'.format(group)) exit(0) return padded_images, shapes, reference_shape.points, pca_model, centers
def mirror_landmarks_68(lms, im_size): return PointCloud(abs(np.array([0, im_size[1]]) - lms.as_vector( ).reshape(-1, 2))[mirrored_parts_68])
def influence(): image_paths = sorted(list(Path('.').glob(FLAGS.dataset))) with tf.Graph().as_default(), tf.device('/cpu:0'): with open(MDM_MODEL_PATH, 'rb') as f: graph_def = tf.GraphDef.FromString(f.read()) tf.import_graph_def(graph_def) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: errors = [] mean_errors = [] step = 0 start_time = time.time() for path in image_paths: mp_image = mio.import_image(path) assert isinstance(mp_image, menpo.image.Image) if mp_image.n_channels == 3: mp_image.pixels = np.mean(mp_image.pixels, 0, keepdims=True) mp_image.landmarks['bb'] = mio.import_landmark_file( str( Path(path.parent.parent / 'BoundingBoxes' / (path.stem + '.pts')))) ly, lx = mp_image.landmarks['bb'].points[0] hy, hx = mp_image.landmarks['bb'].points[2] cx = (lx + hx) / 2 cy = (ly + hy) / 2 bb_size = int(math.ceil(max(hx - lx, hy - ly) * 4. / 6.)) square_bb = np.array([[cy - bb_size, cx - bb_size], [cy + bb_size, cx + bb_size]]) mp_image.landmarks['square_bb'] = PointCloud(square_bb) mp_image = mp_image.crop_to_landmarks_proportion( 0.0, group='square_bb') mp_image = mp_image.resize((112, 112)) np_image = np.expand_dims(mp_image.pixels.transpose((1, 2, 0)), 0) np_shape = mp_image.landmarks['PTS'].points prediction, = sess.run('import/add:0', feed_dict={'import/input:0': np_image}) assert isinstance(prediction, np.ndarray) prediction = prediction.reshape((68, 2)) prediction = prediction[:, [1, 0]] error = normalized_batch_nme(prediction, mp_image.landmarks['PTS'].points) mean_error = normalized_nme(error) error_level = min(9, int(mean_error * 100)) concat_image = utils.draw_landmarks_discrete( np_image[0], np_shape, prediction) # plt.imsave('err{}/step{}.png'.format(error_level, step), concat_image) errors.append(error) mean_errors.append(mean_error) step += 1 if step % 20 == 0: duration = time.time() - start_time sec_per_batch = duration / 20.0 examples_per_sec = 1. / sec_per_batch log_str = '{}: [{:d} batches done] ({:.1f} examples/sec; {:.3f} sec/batch)' print( log_str.format(datetime.now(), step, examples_per_sec, sec_per_batch)) start_time = time.time() errors = np.array(errors) print(errors.shape) mean_errors = np.vstack(mean_errors).ravel() errors_mean = np.mean(errors, 0) mean_errors_mean = mean_errors.mean() with open('errors.txt', 'w') as ofs: for row, avg in zip(errors, mean_errors): for col in row: ofs.write('%.4f, ' % col) ofs.write('%.4f' % avg) ofs.write('\n') for col in errors_mean: ofs.write('%.4f, ' % col) ofs.write('%.4f' % mean_errors_mean) ofs.write('\n') auc_at_08 = (mean_errors < .08).mean() auc_at_05 = (mean_errors < .05).mean() print('Errors', mean_errors.shape) print( '%s: mean_rmse = %.4f, auc @ 0.05 = %.4f, auc @ 0.08 = %.4f' % (datetime.now(), mean_errors.mean(), auc_at_05, auc_at_08))
def crop_to_face_image(img, bb_dictionary=None, gt=True, margin=0.25, image_size=256, normalize=True, return_transform=False): """crop face image using bounding box dictionary, or GT landmarks""" name = img.path.name img_bounds = img.bounds()[1] # if there is no bounding-box dict and GT landmarks are available, use it to determine the bounding box if bb_dictionary is None and img.has_landmarks: grp_name = img.landmarks.group_labels[0] bb_menpo = img.landmarks[grp_name].bounding_box().points bb = np.array([[bb_menpo[0, 1], bb_menpo[0, 0], bb_menpo[2, 1], bb_menpo[2, 0]]]) elif bb_dictionary is not None: if gt: bb = bb_dictionary[name][1] # ground truth else: bb = bb_dictionary[name][0] # init from face detector else: bb = None if bb is not None: # add margin to bounding box bb = center_margin_bb(bb, img_bounds, margin=margin) bb_pointcloud = PointCloud(np.array([[bb[0, 1], bb[0, 0]], [bb[0, 3], bb[0, 0]], [bb[0, 3], bb[0, 2]], [bb[0, 1], bb[0, 2]]])) if return_transform: face_crop, bb_transform = img.crop_to_pointcloud(bb_pointcloud, return_transform=True) else: face_crop = img.crop_to_pointcloud(bb_pointcloud) else: # if there is no bounding box/gt landmarks, use entire image face_crop = img.copy() bb_transform = None # if face crop is not a square - pad borders with mean pixel value h, w = face_crop.shape diff = h - w if diff < 0: face_crop.pixels = np.pad(face_crop.pixels, ((0, 0), (0, -1 * diff), (0, 0)), 'mean') elif diff > 0: face_crop.pixels = np.pad(face_crop.pixels, ((0, 0), (0, 0), (0, diff)), 'mean') if return_transform: face_crop, rescale_transform = face_crop.resize([image_size, image_size], return_transform=True) if bb_transform is None: transform_chain = rescale_transform else: transform_chain = mt.TransformChain(transforms=(rescale_transform, bb_transform)) else: face_crop = face_crop.resize([image_size, image_size]) if face_crop.n_channels == 4: face_crop.pixels = face_crop.pixels[:3, :, :] if normalize: face_crop.pixels = face_crop.rescale_pixels(0., 1.).pixels if return_transform: return face_crop, transform_chain else: return face_crop
def prepare_images(paths, num_patches=73, verbose=True): """Save Train Images to TFRecord, for ShuffleNet Args: paths: a list of strings containing the data directories. num_patches: number of landmarks verbose: boolean, print debugging info. Returns: None """ if len(paths) == 0: return # .../<Dataset>/Images/*.png -> .../<Dataset> path_base = Path(paths[0]).parent.parent image_paths = [] # First & Second: get all image paths; split to train, test and validate. 7:2:1 if Path(path_base / 'train_img.txt').exists(): with Path(path_base / 'train_img.txt').open('rb') as train_ifs, \ Path(path_base / 'test_img.txt').open('rb') as test_ifs, \ Path(path_base / 'val_img.txt').open('rb') as val_ifs: train_paths = [ Path(line[:-1].decode('utf-8')) for line in train_ifs.readlines() ] test_paths = [ Path(line[:-1].decode('utf-8')) for line in test_ifs.readlines() ] val_paths = [ Path(line[:-1].decode('utf-8')) for line in val_ifs.readlines() ] print('Found Train/Test/Validate {}/{}/{}'.format( len(train_paths), len(test_paths), len(val_paths))) else: for path in paths: for file in Path('.').glob(path): try: mio.import_landmark_file( str( Path(file.parent.parent / 'BoundingBoxes' / (file.stem + '.pts')))) except ValueError: continue image_paths.append(file) print('Got all image paths...') random.shuffle(image_paths) num_train = int(len(image_paths) * 0.7) num_test = int(len(image_paths) * 0.2) train_paths = sorted(image_paths[:num_train]) test_paths = sorted(image_paths[num_train:num_train + num_test]) val_paths = sorted(image_paths[num_train + num_test:]) with Path(path_base / 'train_img.txt').open('wb') as train_ofs, \ Path(path_base / 'test_img.txt').open('wb') as test_ofs, \ Path(path_base / 'val_img.txt').open('wb') as val_ofs: train_ofs.writelines( [str(line).encode('utf-8') + b'\n' for line in train_paths]) test_ofs.writelines( [str(line).encode('utf-8') + b'\n' for line in test_paths]) val_ofs.writelines( [str(line).encode('utf-8') + b'\n' for line in val_paths]) print('Write Train/Test/Validate {}/{}/{}'.format( len(train_paths), len(test_paths), len(val_paths))) # Third: export reference shape on train if Path(path_base / 'reference_shape.pkl').exists(): reference_shape = PointCloud( mio.import_pickle(path_base / 'reference_shape.pkl')) else: reference_shape = PointCloud( build_reference_shape(train_paths, num_patches)) mio.export_pickle(reference_shape.points, path_base / 'reference_shape.pkl', overwrite=True) print('Created reference_shape.pkl') # Fourth: image shape & pca # No need for ShuffleNet # Fifth: train data if Path(path_base / 'train.bin').exists(): pass else: random.shuffle(train_paths) with tf.io.TFRecordWriter(str(path_base / 'train.bin')) as ofs: print('Preparing train data...') counter = 0 for path in train_paths: counter += 1 if verbose: status = 10.0 * counter / len(train_paths) status_str = '\rPreparing {:2.2f}%['.format(status * 10) for i in range(int(status)): status_str += '=' for i in range(int(status), 10): status_str += ' ' status_str += '] {} '.format(path) print(status_str, end='') mp_image = load_image(path, 0.7, 336) image = mp_image.pixels.transpose(1, 2, 0).astype(np.float32) shape = mp_image.landmarks['PTS'].points features = tf.train.Features( feature={ 'train/image': tf.train.Feature(bytes_list=tf.train.BytesList( value=[tf.compat.as_bytes(image.tostring())])), 'train/shape': tf.train.Feature(float_list=tf.train.FloatList( value=shape.flatten())) }) ofs.write( tf.train.Example(features=features).SerializeToString()) if verbose: print('') # Sixth: test data if Path(path_base / 'test.bin').exists(): pass else: with tf.io.TFRecordWriter(str(path_base / 'test.bin')) as ofs: print('Preparing test data...') counter = 0 for path in test_paths: counter += 1 if verbose: status = 10.0 * counter / len(test_paths) status_str = '\rPreparing {:2.2f}%['.format(status * 10) for i in range(int(status)): status_str += '=' for i in range(int(status), 10): status_str += ' ' status_str += '] {} '.format(path) print(status_str, end='') mp_image = load_image(path, 1. / 6., 112) mp_image.landmarks['init'] = PointCloud( align_reference_shape_to_112(reference_shape.points)) image = mp_image.pixels.transpose(1, 2, 0).astype(np.float32) shape = mp_image.landmarks['PTS'].points init = mp_image.landmarks['init'].points features = tf.train.Features( feature={ 'test/image': tf.train.Feature(bytes_list=tf.train.BytesList( value=[tf.compat.as_bytes(image.tostring())])), 'test/shape': tf.train.Feature(float_list=tf.train.FloatList( value=shape.flatten())), 'test/init': tf.train.Feature(float_list=tf.train.FloatList( value=init.flatten())) }) ofs.write( tf.train.Example(features=features).SerializeToString()) if verbose: print('')
PointCloud( np.array([[150.9737801, 1.85331141], [191.20452708, 1.86714624], [237.5088486, 7.16836457], [280.68439528, 19.1356864], [319.00988383, 36.18921029], [351.31395982, 61.11002727], [375.83681819, 86.68264647], [401.50706656, 117.12858347], [408.46977018, 156.72258055], [398.49810436, 197.95690492], [375.44584527, 234.437902], [342.35427495, 267.96920594], [299.04149064, 309.66693535], [250.84207113, 331.07734674], [198.46150259, 339.47188196], [144.62222804, 337.84178783], [89.92321435, 327.81734317], [101.22474793, 26.90269773], [89.23456877, 44.52571118], [84.04683242, 66.6369272], [86.36993557, 88.61559027], [94.88123162, 108.04971327], [88.08448274, 152.88439191], [68.71150917, 176.94681489], [55.7165906, 204.86028035], [53.9169657, 232.87050281], [69.08534014, 259.8486207], [121.82883888, 130.79001073], [152.30894887, 128.91266055], [183.36381228, 128.04534764], [216.59234031, 125.86784329], [235.18182671, 93.18819461], [242.46006172, 117.24575711], [246.52987701, 142.46262589], [240.51603561, 160.38006297], [232.61083444, 175.36132625], [137.35714406, 56.53012228], [124.42060774, 67.0342585], [121.98869265, 87.71006061], [130.4421354, 105.16741493], [139.32511836, 89.65144616], [144.17935107, 69.97931719], [125.04221953, 174.72789706], [103.0127825, 188.96555839], [97.38196408, 210.70911033], [107.31622619, 232.4487582], [119.12835959, 215.57040617], [124.80355957, 193.64317941], [304.3174261, 101.83559243], [293.08249678, 116.76961123], [287.11523488, 132.55435452], [289.39839945, 148.49971074], [283.59574087, 162.33458018], [286.76478391, 187.30470094], [292.65033117, 211.98694428], [310.75841097, 187.33036207], [319.06250309, 165.27131484], [321.3339324, 148.86793045], [321.82844973, 133.03866904], [316.60228316, 115.15885333], [303.45716953, 109.59946563], [301.58563675, 135.32572565], [298.16531481, 148.240518], [295.39615418, 162.35992687], [293.63384823, 201.35617245], [301.95207707, 163.05299135], [305.27555828, 148.48478086], [306.41382116, 133.02994058]])))
def evaluate(dataset_path): train_dir = Path(FLAGS.checkpoint_dir) reference_shape = mio.import_pickle( Path(FLAGS.checkpoint_dir) / 'reference_shape.pkl') print(train_dir) shape_space = np.load(FLAGS.checkpoint_dir + '/shape_space.npy') images = data_provider.load_images_test(dataset_path, reference_shape) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) with tf.device('/cpu:0'): actor = DDPG.Actor(sess, shape_space, k_nearest, 0, REPLACEMENT) critic = DDPG.Critic(sess, 0, GAMMA, REPLACEMENT, k_nearest) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) saver = tf.train.Saver(tf.all_variables()) if ckpt and ckpt.model_checkpoint_path: if os.path.isabs(ckpt.model_checkpoint_path): # Restores from checkpoint with absolute path. print('ok') saver.restore(sess, ckpt.model_checkpoint_path) print('Succesfully loaded model from %s' % (ckpt.model_checkpoint_path)) else: # Restores from checkpoint with relative path. saver.restore( sess, os.path.join(FLAGS.checkpoint_dir, ckpt.model_checkpoint_path)) global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] print('Succesfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found') return errors = [] errors_show = [] pred_2D = np.zeros((68, 2)) for i in range(len(images)): print(i, '+++++++++++++++++++++++++++++++++++++++++') image_test = images[i] image_test, init, gt_shape_test, bb_hat = align_shapes( image_test, reference_shape) s = init a = np.zeros(s.shape) q_2D = 100 for j in range(MAX_EP_STEPS): s = s + a a_hat = actor.choose_action_hat(s.reshape(1, PATCHES_2D, 2), image_test) b_hat_k_nn = np.squeeze(a_hat) k_nn_b_a_3_1 = (actor.choose_action(s.reshape(1, PATCHES_2D, 2), b_hat_k_nn, image_test)) q = critic.q_value(s, a_hat, image_test) a = align_shapes( images[i], np.squeeze(s + a), False, PointCloud( PointCloud(np.squeeze(s + a_hat)).bounding_box().points * alpha + bb_hat.points * (1 - alpha)).bounding_box()) - s error = rdn_model.normalized_rmse(s + a_hat, gt_shape_test) print('===========', q[0][0], error) if q <= q_2D: q_2D = q pred_2D = s + a_hat pred = pred_2D error = rdn_model.normalized_rmse(pred, gt_shape_test) print(error) errors.append(error) errors_nn = np.vstack(errors).ravel() for i, e in enumerate(errors_show): print(i, e) #errors = np.vstack(errors).ravel() errors_ = np.vstack(errors).ravel() print(errors_) mean_rmse = errors_.mean() auc_at_08 = (errors_ < .08).mean() auc_at_05 = (errors_ < .05).mean() print('mean_rmse = %.4f, auc @ 0.05 = %.4f, auc @ 0.08 = %.4f' % (mean_rmse, auc_at_05, auc_at_08))