def convert(inputs): imname = inputs['original_filename'] image = inputs['image'] labels = inputs['labels'] label_vis = inputs['label_vis'] results = [] segmentation = labels[:, :, 0] norm_factor = float(crop) / max(image.shape[:2]) image = scipy.misc.imresize(image, norm_factor, interp='bilinear') segmentation = scipy.misc.imresize(segmentation, norm_factor, interp='nearest') if image.shape[0] < crop: # Pad height. image = pad_height(image, crop) segmentation = pad_height(segmentation, crop) if image.shape[1] < crop: image = pad_width(image, crop) segmentation = pad_width(segmentation, crop) labels = np.dstack([segmentation] * 3) label_vis = apply_colormap(segmentation, vmax=21, vmin=0, cmap=CMAP)[:, :, :3] results.append([imname, image * (labels != 0), labels, label_vis]) # Swapped version. imname = path.splitext(imname)[0] + '_swapped' + path.splitext(imname)[1] image = image[:, ::-1] segmentation = segmentation[:, ::-1] segmentation = lrswap_regions(segmentation) labels = np.dstack([segmentation] * 3) label_vis = apply_colormap(segmentation, vmax=21, vmin=0, cmap=CMAP)[:, :, :3] results.append([imname, image * (labels != 0), labels, label_vis]) return results
def process_image(im_fp, dset_part, out_folder): bn = path.basename(im_fp) dn = path.dirname(im_fp) img_idx = int(bn[:bn.find("_")]) body_fp = path.join(dn, bn + '_body.pkl') im = sm.imread(im_fp) if not path.exists(body_fp): raise Exception("Body fit not found for `%s` (`%s`)!" % (im_fp, body_fp)) rendering = upr.render_body_impl(body_fp, resolution=(im.shape[0], im.shape[1]), quiet=True, use_light=False)[0] annotation = upm.regions_to_classes(rendering, upm.six_region_groups, warn_id=str(img_idx)) out_fp = path.join( out_folder, dset_part, "{:0{width}d}_bodysegments.png".format(img_idx, width=bn.find("_"))) sm.imsave(out_fp, annotation) out_fp = path.join( out_folder, dset_part, "{:0{width}d}_bodysegments_vis.png".format(img_idx, width=bn.find("_"))) sm.imsave( out_fp, vs.apply_colormap(annotation, vmin=0, vmax=6, cmap=config.CMAP)[:, :, 0:3])
def postprocess_colormap(cls, postprocess=True): """Create a colormap out of the classes and postprocess the face.""" batch = vs.apply_colormap(cls, vmin=0, vmax=21, cmap=CMAP) cmap = vs.apply_colormap(np.array(range(22), dtype='uint8'), vmin=0, vmax=21, cmap=CMAP) COLSET = cmap[18:22] FCOL = cmap[11] if postprocess: kernel = np.ones((2, 2), dtype=np.uint8) for im in batch: for col in COLSET: # Extract the map of the matching color. colmap = np.all(im == col, axis=2).astype(np.uint8) # Erode. while np.sum(colmap) > 10: colmap = cv2.erode(colmap, kernel) # Prepare the original map for remapping. im[np.all(im == col, axis=2)] = FCOL # Backproject. im[colmap == 1] = col return batch[:, :, :, :3]
def prepare(im_idx): # Load. image = sm.imread( path.join(CHICTOPIA_DATA_FP, 'JPEGImages', '%s.jpg' % (im_idx))) if image.ndim != 3: return [] resize_factor = 513. / max(image.shape[:2]) im_resized = sm.imresize(image, resize_factor) annotation = sm.imread( path.join(CHICTOPIA_DATA_FP, 'SegmentationClassAug', '%s.png' % (im_idx))) resannot = sm.imresize(annotation, resize_factor, interp='nearest') # Holes. kernel = np.ones((7, 7), np.uint8) closed_annot = cv2.morphologyEx(resannot, cv2.MORPH_CLOSE, kernel) grad = cv2.morphologyEx(resannot, cv2.MORPH_BLACKHAT, kernel) to_fill = np.logical_and(resannot == 0, grad > 0) resannot[to_fill] = closed_annot[to_fill] # Face detection. FDEBUG = False # For debugging. if FDEBUG: win = dlib.image_window() win.clear_overlay() win.set_image(im_resized) face_box = getface(resannot) max_IOU = 0. if face_box is not None: most_likely_det = None dets, _, _ = fdetector.run(im_resized, 1, -1) for k, d in enumerate(dets): # Calculate IOU with ~ground truth. ar_pred = (d.right() - d.left()) * (d.bottom() - d.top()) face_points = (resannot == 11) face_pos = np.where(face_points) inters_x = np.logical_and(face_pos[1] >= d.left(), face_pos[1] < d.right()) inters_y = np.logical_and(face_pos[0] >= d.top(), face_pos[0] < d.bottom()) inters_p = np.sum(np.logical_and(inters_x, inters_y)) outs_p = np.sum(face_points) - inters_p IOU = float(inters_p) / (outs_p + ar_pred) if IOU > 1.: import ipdb ipdb.set_trace() if IOU > 0.3 and IOU > max_IOU: most_likely_det = d max_IOU = IOU if most_likely_det is not None: shape = spredictor(im_resized, most_likely_det) # Save hat, hair and sunglasses (likely to cover eyes or nose). hat = (resannot == 1) hair = (resannot == 2) sungl = (resannot == 3) # Add annotations: an_lm = { (48, 67): 18, # lips (27, 35): 19, # nose (36, 41): 20, # leye (42, 47): 21, # reye } for rng, ann_id in an_lm.items(): poly = np.empty((2, rng[1] - rng[0]), dtype=np.int64) for point_idx, point_id in enumerate(range(*rng)): poly[0, point_idx] = shape.part(point_id).x poly[1, point_idx] = shape.part(point_id).y # Draw additional annotations. poly = poly.T.copy() cv2.fillPoly(resannot, [poly], (ann_id, )) # Write back hat, hair and sungl. resannot[hat] = 1 resannot[hair] = 2 resannot[sungl] = 3 if FDEBUG: win.add_overlay(shape) win.add_overlay(most_likely_det) dlib.hit_enter_to_continue() else: # No reliable face found. return [] return [('%s.jpg' % (im_idx), im_resized, np.dstack([resannot] * 3), apply_colormap(resannot, vmin=0, vmax=21, cmap=CMAP)[:, :, :3])]
def main( caffe_prototxt, # pylint: disable=too-many-arguments, too-many-locals, too-many-statements caffe_model, image_folder, image_list_file, output_folder, caffe_install_path, n_labels): """Store and visualize the segmentation results for a model.""" LOGGER.info("Storing segmentation results to folder `%s`.", output_folder) LOGGER.info("Using caffe from `%s`.", caffe_install_path) sys.path.insert(0, path.join(caffe_install_path)) import caffe # pylint: disable=import-error mean_red = 122.675 mean_green = 116.669 mean_blue = 104.008 # Configure preprocessing caffe.set_mode_gpu() net_full_conv = caffe.Net(caffe_prototxt, caffe_model, caffe.TEST) net_input_blob = net_full_conv.inputs[0] transformer = caffe.io.Transformer({ net_full_conv.inputs[0]: net_full_conv.blobs[net_full_conv.inputs[0]].data.shape }) transformer.set_transpose(net_input_blob, (2, 0, 1)) transformer.set_channel_swap(net_input_blob, (2, 1, 0)) transformer.set_raw_scale(net_input_blob, 255.0) net_inp_height, net_inp_width = net_full_conv.blobs[ net_input_blob].data.shape[2:4] # Create and configure the mean image. The transformer applies channel-swap # first, so we have BGR order for the mean image. mean_image = np.zeros((3, net_inp_height, net_inp_width), dtype='float32') mean_image[0, :, :] = mean_blue mean_image[1, :, :] = mean_green mean_image[2, :, :] = mean_red transformer.set_mean(net_input_blob, mean_image) with open(image_list_file, 'r') as inf: image_list = inf.readlines() for imgnames in tqdm.tqdm(image_list): imgname = imgnames.split(" ")[0][1:].strip() LOGGER.debug("Processing `%s`...", imgname) image_filename = path.join(image_folder, imgname) # caffe.io loads as RGB, and in range [0., 1.]. im = caffe.io.load_image(image_filename) # pylint: disable=invalid-name height, width = im.shape[:2] # Pad values. pad_width = net_inp_width - width pad_height = net_inp_height - height im = np.lib.pad( im, # pylint: disable=invalid-name ((0, pad_height), (0, pad_width), (0, 0)), 'constant', constant_values=-5) assert im.shape[0] == net_inp_height assert im.shape[1] == net_inp_width R = im[:, :, 0] # pylint: disable=invalid-name G = im[:, :, 1] # pylint: disable=invalid-name B = im[:, :, 2] # pylint: disable=invalid-name # Will be multiplied by 255 by the transformer. R[R == -5] = mean_red / 255. G[G == -5] = mean_green / 255. B[B == -5] = mean_blue / 255. im[:, :, 0] = R im[:, :, 1] = G im[:, :, 2] = B out = net_full_conv.forward_all( data=np.asarray([transformer.preprocess(net_input_blob, im)])) pmap = out['prob'][0] assert pmap.min() >= 0. and pmap.max() <= 1., ( "Invalid probability value in result map!") prob_map = pmap[:, :height, :width] np.save(path.join(output_folder, path.basename(imgname) + '.npy'), prob_map) maxed_map = np.argmax(prob_map, axis=0) vis_image = Image.fromarray( apply_colormap(maxed_map, vmax=n_labels - 1)) vis_image.save( path.join(output_folder, path.basename(imgname) + '.npy.vis.png')) raw_image = Image.fromarray(maxed_map.astype('uint8')) raw_image.save( path.join(output_folder, path.basename(imgname) + '_segmentation.png'))
def add_dataset(dset_fp, dset_rel_fp, up3d_fp, # pylint: disable=too-many-locals, too-many-arguments, too-many-statements, too-many-branches train_list_f, val_list_f, test_list_f, train_spec, val_spec, test_spec, target_person_size, landmarks, partspec, crop, running_idx, only_missing=False): """Add a dataset to the collection.""" test_ids = [int(id_[1:6]) for id_ in test_spec] train_ids = [int(id_[1:6]) for id_ in train_spec] val_ids = [int(id_[1:6]) for id_ in val_spec] ids_list = sorted(train_ids + val_ids + test_ids) LOGGER.info("Split: %d train, %d val, %d test.", len(train_ids), len(val_ids), len(test_ids)) LOGGER.info("Writing dataset...") for im_idx in tqdm.tqdm(ids_list): image = scipy.misc.imread(path.join(up3d_fp, '%05d_image.png' % (im_idx))) with open(path.join(up3d_fp, '%05d_fit_crop_info.txt' % (im_idx)), 'r') as inf: cropinfo = [int(val) for val in inf.readline().strip().split()] assert image.ndim == 3 out_exists = (path.exists(path.join(dset_fp, '%05d_image.png' % (running_idx))) and path.exists(path.join(dset_fp, '%05d_ann.png' % (running_idx))) and path.exists(path.join(dset_fp, '%05d_ann_vis.png' % (running_idx))) and path.exists(path.join(dset_fp, '%05d_render.png' % (running_idx))) and path.exists(path.join(dset_fp, '%05d_render_light.png' % (running_idx)))) if not (only_missing and out_exists): rendering = uncrop(render_body_impl(path.join(up3d_fp, '%05d_body.pkl' % (im_idx)), resolution=(cropinfo[1], cropinfo[0]), quiet=True, use_light=False)[0], image.shape[:2], cropinfo) rendering_l = uncrop(render_body_impl(path.join(up3d_fp, '%05d_body.pkl' % (im_idx)), resolution=(cropinfo[1], cropinfo[0]), quiet=True, use_light=True)[0], image.shape[:2], cropinfo) joints = np.load(path.join(up3d_fp, '%05d_joints.npy' % (im_idx))) joints = np.vstack((joints, np.all(joints > 0, axis=0)[None, :])) person_size = robust_person_size(joints) norm_factor = float(target_person_size) / person_size landmark_pos = get_landmark_positions(path.join(up3d_fp, '%05d_body.pkl' % (im_idx)), (cropinfo[1], cropinfo[0]), landmarks) fac_y = cropinfo[0] / float(cropinfo[3] - cropinfo[2]) fac_x = cropinfo[1] / float(cropinfo[5] - cropinfo[4]) landmark_pos[:2, :] /= np.mean([fac_x, fac_y]) landmark_pos[0, :] += cropinfo[4] landmark_pos[1, :] += cropinfo[2] landmark_pos[:2, :] *= norm_factor if not (only_missing and out_exists): image = scipy.misc.imresize(image, norm_factor, interp='bilinear') rendering = scipy.misc.imresize(rendering, norm_factor, interp='nearest') rendering_l = scipy.misc.imresize(rendering_l, norm_factor, interp='bilinear') if image.shape[0] > crop or image.shape[1] > crop: LOGGER.debug("Image (original %d, here %d) too large (%s)! Cropping...", im_idx, running_idx, str(image.shape[:2])) person_center = np.mean(joints[:2, joints[2, :] == 1], axis=1) * norm_factor crop_y, crop_x = get_crop(image, person_center, crop) image = image[crop_y[0]:crop_y[1], crop_x[0]:crop_x[1], :] rendering = rendering[crop_y[0]:crop_y[1], crop_x[0]:crop_x[1], :] rendering_l = rendering_l[crop_y[0]:crop_y[1], crop_x[0]:crop_x[1], :] landmark_pos[0, :] -= crop_x[0] landmark_pos[1, :] -= crop_y[0] assert image.shape[0] == crop or image.shape[1] == crop, ( "Error cropping image (original %d, here %d)!" % (im_idx, running_idx)) assert image.shape[0] <= crop and image.shape[1] <= crop and image.shape[2] == 3, ( "Wrong image shape (original %d, here %d)!" % (im_idx, running_idx)) class_groups = six_region_groups if partspec == '6' else None annotation = regions_to_classes(rendering, class_groups, warn_id=str(im_idx)) if partspec == '1': annotation = (annotation > 0).astype('uint8') assert np.max(annotation) <= int(partspec), ( "Wrong annotation value (original %d, here %d): %s!" % ( im_idx, running_idx, str(np.unique(annotation)))) if running_idx == 0: assert np.max(annotation) == int(partspec), ( "Probably an error in the number of parts!") pose_vis_im = vs.visualize_pose(cv2.cvtColor(annotation*8, cv2.COLOR_GRAY2RGB), landmark_pos, scale=1.) scipy.misc.imsave(path.join(dset_fp, '%05d_image.png' % (running_idx)), image) scipy.misc.imsave(path.join(dset_fp, '%05d_ann.png' % (running_idx)), annotation) scipy.misc.imsave(path.join(dset_fp, '%05d_seg_ann_vis.png' % (running_idx)), apply_colormap(annotation, vmax=int(partspec))) # scipy.misc.imsave(path.join(dset_fp, '%05d_render.png' % (running_idx)), rendering) scipy.misc.imsave(path.join(dset_fp, '%05d_render_light.png' % (running_idx)), rendering_l) # pylint: disable=line-too-long scipy.misc.imsave(path.join(dset_fp, '%05d_pose_ann_vis.png' % (running_idx)), pose_vis_im) landmark_pos = np.concatenate((landmark_pos, joints[2][None, :])) np.save(str(path.join(dset_fp, '%05d_joints.npy' % (running_idx))), landmark_pos, allow_pickle=False) if im_idx in train_ids: list_f = train_list_f elif im_idx in val_ids: list_f = val_list_f elif im_idx in test_ids: list_f = test_list_f list_f.write("/%s/%05d_image.png /%s/%05d_ann.png %f\n" % ( dset_rel_fp, running_idx, dset_rel_fp, running_idx, norm_factor)) list_f.flush() running_idx += 1 return running_idx
def add_dataset(dset_fp, landmarks, partspec, resolution_wh=256, num_zfill_in_name=5, start=0, num=0, num_augment_per_sample=0, global_pose_augment=False, global_pose_max_angle=2 * np.pi, flip_init_glob_pose=True, shape_augment=False, shape_range=[-2, 2], fat_height_range=[-4, 4], pose_augment=False, pose_axis_deviation_scale=0.1, pose_angle_deviation_range=[-np.pi / 6, np.pi / 6]): """Add a dataset to the collection.""" ids_list = [ str(f[:num_zfill_in_name]) for f in sorted(os.listdir(dset_fp)) if f.endswith("_body.pkl") and '-' not in f ] to_render_ids = ids_list[start:start + num] LOGGER.info("Writing dataset. Shape augment: {}, " "Glob pose augment: {}, " "Pose augment: {}".format(str(shape_augment), str(global_pose_augment), str(pose_augment))) for im_idx in to_render_ids: print('Index', im_idx) smpl_path = path.join(dset_fp, '{}_body.pkl'.format(im_idx)) with open(smpl_path, 'rb') as f: smpl_data = pickle.load(f) pose = smpl_data['pose'] if 'betas' in smpl_data.keys(): betas = smpl_data['betas'] elif 'shape' in smpl_data.keys(): betas = smpl_data['shape'] rt = np.array([0.0, 0.0, 0.0]) f = np.array(5000.0) trans = np.array([0.0, 0.0, 0.0]) t = np.array([0.0, 0.0, 40.0]) # ------- First render original, un-augmented data (if doing UP3D augmentation) ------- camera = { 'rt': rt, 'f': f, 'trans': trans, 't': t, 'betas': betas, 'pose': pose } resolution = (resolution_wh, resolution_wh) factor = 1.0 renderings = render(MODEL_NEUTRAL, (np.asarray(resolution) * 1. / factor).astype('int'), camera, 1, False, use_light=False) renderings = [ scipy.misc.imresize(renderim, (resolution[1], resolution[0]), interp='nearest') for renderim in renderings ] rendering = renderings[ 0] # Only rendering one rotated view - single element in list landmark_pos = get_landmark_positions(betas, pose, trans, resolution, landmarks, rt, t, f) vis = check_landmark_visibility(landmark_pos, resolution_wh) class_groups = six_region_groups if partspec == '6' else None annotation = regions_to_classes(rendering, class_groups, warn_id=im_idx) if partspec == '1': annotation = (annotation > 0).astype('uint8') # assert np.max(annotation) <= int(partspec), ( # "Wrong annotation value (%s): %s!" % (im_idx, str(np.unique(annotation)))) # if int(im_idx) == 0: # assert np.max(annotation) == int(partspec), ("Probably an error in the number of parts!") pose_vis_im = vs.visualize_pose(cv2.cvtColor(annotation * 8, cv2.COLOR_GRAY2RGB), landmark_pos, scale=1.) scipy.misc.imsave(path.join(dset_fp, '{}_ann.png'.format(im_idx)), annotation) scipy.misc.imsave( path.join(dset_fp, '{}_seg_ann_vis.png'.format(im_idx)), apply_colormap(annotation, vmax=int(partspec))) scipy.misc.imsave( path.join(dset_fp, '{}_pose_ann_vis.png'.format(im_idx)), pose_vis_im) landmark_pos_with_vis = np.concatenate([landmark_pos, vis[None, :]], axis=0) np.save(str(path.join(dset_fp, '{}_joints.npy'.format(im_idx))), landmark_pos_with_vis, allow_pickle=False) # --------------- Render augmented data (if doing UP3D augmentation) --------------- # UP3D Augmentation by random sampling if num_augment_per_sample > 0: if global_pose_augment: assert 'global_pose' in dset_fp, "Dataset path is probably wrong!" # Random sampling of global rotations new_r_globals = uniform_rotation_sample_global_pose( global_pose_max_angle=global_pose_max_angle, flip_init_global_pose=flip_init_glob_pose, num=num_augment_per_sample - 1) # First global rotation augmentation is set to be a backwards facing one # since this is the second most common global pose modality after front-facing R_global_backface = np.matmul( cv2.Rodrigues(np.array([0, np.pi, 0]))[0], cv2.Rodrigues(np.array([np.pi, 0, 0]))[0]) r_global_backface = np.squeeze( cv2.Rodrigues(R_global_backface)[0]) r_global_backface += 0.1 * np.random.randn( 3) # add some random noise new_r_globals.insert(0, r_global_backface) if shape_augment: assert 'shape' in dset_fp, "Dataset path is probably wrong!" # Random sampling of shape deviations from original betas_delta = uniform_sample_smpl_shape_deviation( range=shape_range, fat_and_height_range=fat_height_range, num=num_augment_per_sample) if pose_augment: assert 'pose' in dset_fp, "Dataset path is probably wrong!" # Random sampling of axis and angle deviations from original pose new_poses = uniform_axis_angle_sample_pose_deviation( pose[3:], pose_axis_deviation_scale, pose_angle_deviation_range, num) for aug_idx in range(num_augment_per_sample): print('Aug', aug_idx) aug_pose = np.copy(pose) aug_betas = np.copy(betas) if global_pose_augment: aug_pose[:3] = new_r_globals[aug_idx] if shape_augment: aug_betas = aug_betas + betas_delta[aug_idx] if pose_augment: aug_pose[3:] = new_poses[aug_idx] aug_camera = { 'rt': rt, 'f': f, 'trans': trans, 't': t, 'betas': aug_betas, 'pose': aug_pose } resolution = (resolution_wh, resolution_wh) factor = 1.0 aug_renderings = render(MODEL_NEUTRAL, (np.asarray(resolution) * 1. / factor).astype('int'), aug_camera, 1, False, use_light=False) aug_renderings = [ scipy.misc.imresize(renderim, (resolution[1], resolution[0]), interp='nearest') for renderim in aug_renderings ] aug_rendering = aug_renderings[ 0] # Rendering 1 rotated view - 1 element in list aug_landmark_pos = get_landmark_positions(aug_betas, aug_pose, trans, resolution, landmarks, rt, t, f) aug_vis = check_landmark_visibility(aug_landmark_pos, resolution_wh) class_groups = six_region_groups if partspec == '6' else None aug_annotation = regions_to_classes(aug_rendering, class_groups, warn_id=im_idx) if partspec == '1': aug_annotation = (aug_annotation > 0).astype('uint8') # assert np.max(annotation) <= int(partspec), ( # "Wrong annotation value (%s): %s!" % (im_idx, str(np.unique(annotation)))) # if int(im_idx) == 0: # assert np.max(annotation) == int(partspec), ("Probably an error in the number of parts!") aug_pose_vis_im = vs.visualize_pose(cv2.cvtColor( aug_annotation * 8, cv2.COLOR_GRAY2RGB), aug_landmark_pos, scale=1.) scipy.misc.imsave( path.join(dset_fp, '{}-{}_ann.png'.format(im_idx, aug_idx)), aug_annotation) scipy.misc.imsave( path.join(dset_fp, '{}-{}_seg_ann_vis.png'.format(im_idx, aug_idx)), apply_colormap(aug_annotation, vmax=int(partspec))) scipy.misc.imsave( path.join(dset_fp, '{}-{}_pose_ann_vis.png'.format(im_idx, aug_idx)), aug_pose_vis_im) aug_landmark_pos_with_vis = np.concatenate( [aug_landmark_pos, aug_vis[None, :]], axis=0) np.save(str( path.join(dset_fp, '{}-{}_joints.npy'.format(im_idx, aug_idx))), aug_landmark_pos_with_vis, allow_pickle=False) aug_smpl_save_path = path.join( dset_fp, '{}-{}_body.pkl'.format(im_idx, aug_idx)) with open(aug_smpl_save_path, 'wb') as aug_f: pickle.dump({ 'betas': aug_betas, 'pose': aug_pose }, aug_f, protocol=2)