def convert_tless_gt(gt_path, images_path, image_extension, object_models_path, output_path): import os import tless_inout as inout import util import json from collections import OrderedDict assert os.path.exists(gt_path), "Ground truth file does not exist." assert os.path.exists(images_path), "Images path does not exist." assert os.path.exists( object_models_path), "Object models path does not exist." assert image_extension in ['png', 'jpg', 'jpeg', 'tiff'], "Unkown image file format." with open(output_path, "w") as json_gt: yml_gt = inout.load_gt(gt_path) converted = OrderedDict() image_filenames = util.get_files_at_path_of_extensions( images_path, [image_extension]) util.sort_list_by_num_in_string_entries(image_filenames) object_model_filenames = util.get_files_at_path_of_extensions( object_models_path, ['ply', 'obj']) util.sort_list_by_num_in_string_entries(object_model_filenames) index = 0 for filename in image_filenames: converted[filename] = [] for gt_entry in range(len(yml_gt[index])): gt = yml_gt[index][gt_entry] obj_id = gt['obj_id'] # IDs of object models in T-Less are 1 based obj_id -= 1 obj_filename = object_model_filenames[obj_id] converted[filename].append({ 'R': gt['cam_R_m2c'].flatten().tolist(), 't': gt['cam_t_m2c'].flatten().tolist(), 'bb': gt['obj_bb'], 'obj': obj_filename }) index += 1 json.dump(converted, json_gt)
def segmentation_iou_and_reprojection_error(gt_seg, pred_seg, color, output_file): result = OrderedDict() gt_segmentation_images = util.get_files_at_path_of_extensions(gt_seg, ['png']) pred_segmentation_images = util.get_files_at_path_of_extensions(pred_seg, ['png']) util.sort_list_by_num_in_string_entries(gt_segmentation_images) util.sort_list_by_num_in_string_entries(pred_segmentation_images) for i in range(len(pred_segmentation_images)): segmentation_image_name = pred_segmentation_images[i] if segmentation_image_name in gt_segmentation_images: image_filename = pred_segmentation_images[i].split("_segmentation.png")[0] + ".jpg" # Compute segmentation IOU gt_segmentation_image = cv2.imread(os.path.join(gt_seg, segmentation_image_name)) pred_segmentation_image = cv2.imread(os.path.join(pred_seg, segmentation_image_name)) # Check if rendered segmentation mask is empty for some reason if pred_segmentation_image is not None: print('Processing {}'.format(image_filename)) max_shape = [max(gt_segmentation_image.shape[0], pred_segmentation_image.shape[0]), max(gt_segmentation_image.shape[1], pred_segmentation_image.shape[1])] # We do not need three dimensions, we only need a 2D boolean mask where the segmentation # is equal to the desired color gt_segmentation_image_bool = np.zeros(max_shape) gt_indices = np.all(gt_segmentation_image == color, axis=2) # Fill mask to match largest size gt_padding = ((0, max_shape[0] - gt_segmentation_image.shape[0]), (0, max_shape[1] - gt_segmentation_image.shape[1])) gt_indices = np.pad(gt_indices, gt_padding, 'constant', constant_values=False) gt_segmentation_image_bool[gt_indices] = 1 pred_indices = np.all(pred_segmentation_image == color, axis=2) pred_padding = ((0, max_shape[0] - pred_segmentation_image.shape[0]), (0, max_shape[1] - pred_segmentation_image.shape[1])) pred_indices = np.pad(pred_indices, pred_padding, 'constant', constant_values=False) pred_segmentation_image_bool = np.zeros(max_shape) pred_segmentation_image_bool[pred_indices] = 1 intersection = gt_segmentation_image_bool * pred_segmentation_image_bool union = (gt_segmentation_image_bool + pred_segmentation_image_bool) / 2 iou = intersection.sum() / float(union.sum()) result[image_filename] = {} result[image_filename]["iou"] = iou else: print('No corresponding ground-truth segmentation found for {}'.format(segmentation_image_name)) with open(output_file, 'w') as out_file: json.dump(result, out_file)
def convert_tless_gt(images_path, image_extension, fovx, fovy, output_path): import os import cv2 import json import util import math from collections import OrderedDict assert os.path.exists(images_path), "Images path does not exist." assert image_extension in ['png', 'jpg', 'jpeg', 'tiff'], "Unkown image file format." with open(output_path, "w") as json_info: cam_info = OrderedDict() image_filenames = util.get_files_at_path_of_extensions(images_path, [image_extension]) util.sort_list_by_num_in_string_entries(image_filenames) for filename in image_filenames: cam_info[filename] = {} image = cv2.imread(os.path.join(images_path, filename)) s = image.shape cam_info[filename] = {'K' : [s[1] / math.tan(fovx / 2), 0, s[1] / float(2), 0, s[0] / math.tan(fovy / 2), s[0] / float(2), 0, 0, 1]} json.dump(cam_info, json_info)
def convert_tless_cam_info(cam_info_path, images_path, image_extension, output_path): import os import tless_inout as inout import util import json assert os.path.exists(cam_info_path), "Cam info file does not exist." assert os.path.exists(images_path), "Images path does not exist." assert image_extension in ['png', 'jpg', 'jpeg', 'tiff'], "Unkown image file format." with open(output_path, "w") as json_cam_info: yaml_info = inout.load_info(cam_info_path) converted = {} image_filenames = util.get_files_at_path_of_extensions(images_path, [image_extension]) util.sort_list_by_num_in_string_entries(image_filenames) index = 0 for filename in image_filenames: info = yaml_info[index] converted_single = {'K' : info['cam_K'].flatten().tolist(), 'mode' : info['mode'], 'elev' : info['elev']} if 'cam_R_w2c' in info: # In this case we also have the rotation of the camera, which is provided for test images converted_single['R'] = info['cam_R_w2c'].flatten().tolist() converted_single['t'] = info['cam_t_w2c'].flatten().tolist() converted[filename] = converted_single index += 1 json.dump(converted, json_cam_info)
def visualize_errors(gt_images_path, prediction_images_path, output_path, output_path_float=None): gt_images = util.get_files_at_path_of_extensions(gt_images_path, [".tiff"]) util.sort_list_by_num_in_string_entries(gt_images) prediction_images = util.get_files_at_path_of_extensions(prediction_images_path, [".tiff"]) util.sort_list_by_num_in_string_entries(prediction_images) if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) # The threshold to use as maximum (i.e. 255 in RGB) because using the # maximum in the prediction itself might make smaller errors vanish # if the maximum is a lot larger than the rest of the coordinates. coord_threshold = 5 for index in range(len(prediction_images)): prediction_image = prediction_images[index] if not prediction_image in gt_images: print("Could not find corresponding ground truth image.") continue gt_image = gt_images[gt_images.index(prediction_image)] output_filename = os.path.splitext(gt_image)[0] output_filename_float = os.path.join(output_path, output_filename + "_error.tiff") output_filename = os.path.join(output_path, output_filename + "_error.png") gt_image_loaded = tiff.imread(os.path.join(gt_images_path, gt_image)) prediction_image_loaded = tiff.imread(os.path.join(prediction_images_path, prediction_image)) # Bring ground truth to same size, by taking ever other (or i-th) pixel because # that's essentially what the network does when the output size is smaller than the # input size gt_image_loaded = util.shrink_image_with_step_size(gt_image_loaded, prediction_image_loaded.shape) diff = gt_image_loaded - prediction_image_loaded diff = np.absolute(diff) if output_path_float: tiff.imwrite(output_filename_float, diff) diff = (diff / coord_threshold) * 255 diff[diff > 255] = 255 diff = diff.astype(np.int32) cv2.imwrite(output_filename, diff)
def generate_data(images_path, image_extension, object_models_path, object_model_name, ground_truth_path, cam_info_path, segmentation_color, crop_on_segmentation_mask, output_path): print("Generating training data.") # The paths where to store the results images_output_path = os.path.join(output_path, "images") segmentations_output_path = os.path.join(output_path, "segmentations") obj_coords_output_path = os.path.join(output_path, "obj_coords") if os.path.exists(images_output_path): shutil.rmtree(images_output_path) if os.path.exists(segmentations_output_path): shutil.rmtree(segmentations_output_path) if os.path.exists(obj_coords_output_path): shutil.rmtree(obj_coords_output_path) os.makedirs(images_output_path) os.makedirs(segmentations_output_path) os.makedirs(obj_coords_output_path) # To process only images that actually exist existing_images = util.get_files_at_path_of_extensions( images_path, [image_extension]) plt.ioff() # Turn interactive plotting off cam_info_output_path = os.path.join(output_path, "info.json") with open(ground_truth_path, 'r') as gt_data_file, \ open(cam_info_path, 'r') as cam_info_file, \ open(cam_info_output_path, 'w') as cam_info_output_file: gt_data = OrderedDict( sorted(json.load(gt_data_file).items(), key=lambda t: t[0])) cam_info = json.load(cam_info_file) new_cam_info = {} for image_filename in gt_data: if not image_filename in existing_images: continue print("Processing file {}".format(image_filename)) image_filename_without_extension = os.path.splitext( image_filename)[0] image_path = os.path.join(images_path, image_filename) gts_for_image = gt_data[image_filename] for gt_entry in range(len(gts_for_image)): gt = gts_for_image[gt_entry] # Filter out ground-truth entries that are interesting to us if object_model_name == gt['obj']: ################## TODO: add support for .obj files ################### object_model = inout.load_ply( os.path.join(object_models_path, object_model_name)) # Rotation matrix was flattend to store it in a json R = np.array(gt['R']).reshape(3, 3) t = np.array(gt['t']) object_model['R'] = R object_model['t'] = t image_cam_info = cam_info[image_filename] # Same goes for camera matrix K = np.array(image_cam_info['K']).reshape(3, 3) image = cv2.imread(image_path) surface_colors = [[255, 255, 255]] # Render the object coordinates ground truth and store it as tiff image renderings = renderer.render( (image.shape[0], image.shape[1]), K, [object_model], surface_colors, modes=['obj_coords', 'segmentation']) # Render the segmentation image first, to crop all images to the segmentation mask segmentation_rendering = renderings['segmentation'] ####################### OBJECT COORDINATES ####################### # Render, crop and save object coordinates object_coordinates_rendering = renderings[ 'obj_coords'].astype(np.float16) if crop_on_segmentation_mask: object_coordinates = util.crop_image_on_segmentation_color( object_coordinates_rendering, segmentation_rendering, segmentation_color) object_coordinates_rendering_path = image_filename_without_extension + "_obj_coords.tiff" object_coordinates_rendering_path = os.path.join( obj_coords_output_path, object_coordinates_rendering_path) tiff.imsave(object_coordinates_rendering_path, object_coordinates) ####################### IMAGE ####################### # Save the original image in a cropped version as well if crop_on_segmentation_mask: cropped_image = util.crop_image_on_segmentation_color( image, segmentation_rendering, segmentation_color) cropped_image_path = os.path.join(images_output_path, image_filename) cv2.imwrite(cropped_image_path, cropped_image) ####################### SEGMENTATION IMAGE ####################### # We need to write the crops into the new camera info file because the principal points # changes when we crop the image if crop_on_segmentation_mask: segmentation_rendering, crop_frame = util.crop_image_on_segmentation_color( segmentation_rendering, segmentation_rendering, segmentation_color, return_frame=True) segmentation_rendering_path = image_filename_without_extension + "_segmentation.png" segmentation_rendering_path = os.path.join( segmentations_output_path, segmentation_rendering_path) cv2.imwrite(segmentation_rendering_path, segmentation_rendering) # Update camera matrix # I.e. the principal point has to be adjusted by shifting it by the crop offset K[0][2] = K[0][2] - crop_frame[1] K[1][2] = K[1][2] - crop_frame[0] image_cam_info['K'] = K.flatten().tolist() new_cam_info[image_filename] = image_cam_info json.dump( OrderedDict(sorted(new_cam_info.items(), key=lambda t: t[0])), cam_info_output_file)
def generate_data(images_path, image_extension, object_models_path, object_model_name, ground_truth_path, cam_info_path, segmentation_color, crop_on_segmentation_mask, output_path): print("Generating training data.") segmentations_output_path = os.path.join(output_path, 'segmentations') if os.path.exists(segmentations_output_path): shutil.rmtree(segmentations_output_path) os.makedirs(segmentations_output_path) # To process only images that actually exist existing_images = util.get_files_at_path_of_extensions( images_path, [image_extension]) plt.ioff() # Turn interactive plotting off with open(ground_truth_path, 'r') as gt_data_file, \ open(cam_info_path, 'r') as cam_info_file: cam_info = json.load(cam_info_file) gt_data = OrderedDict( sorted(json.load(gt_data_file).items(), key=lambda t: t[0])) for image_filename in gt_data: if not image_filename in existing_images: continue print("Processing file {}".format(image_filename)) image_filename_without_extension = os.path.splitext( image_filename)[0] image_path = os.path.join(images_path, image_filename) image = cv2.imread(image_path) image_cam_info = cam_info[image_filename] # Same goes for camera matrix K = np.array(image_cam_info['K']).reshape(3, 3) gts_for_image = gt_data[image_filename] if len( [gt for gt in gts_for_image if gt['obj'] == object_model_name ]) > 1: print( "Warning: found multiple ground truth entries for the object." " Using only the first one.") # We have one object model that we actually want to render the segmentation for # the other ones may overlay the object and have to be rendered as well desired_obj_model = None misc_obj_models = [] for gt_entry in range(len(gts_for_image)): gt = gts_for_image[gt_entry] # Rotation matrix was flattend to store it in a json R = np.array(gt['R']).reshape(3, 3) t = np.array(gt['t']) object_model = inout.load_ply( os.path.join(object_models_path, gt['obj'])) object_model['R'] = R object_model['t'] = t if object_model_name == gt['obj'] and desired_obj_model is None: # We found the first entry for our object model, i.e. this is the one # we want to render a segmentation mask for desired_obj_model = object_model else: misc_obj_models.append(object_model) segmentation_rendering_path = image_filename_without_extension + "_segmentation.png" segmentation_rendering_path = os.path.join( segmentations_output_path, segmentation_rendering_path) if desired_obj_model is None: # No entry in the scene for our object model, i.e. we write out # and empty segmentation image because the network checks for # the color and skips the image if the segmentation color is # not present cv2.imwrite(segmentation_rendering_path, np.zeros(image.shape, np.uint8)) continue main_surface_color = [[255, 255, 255]] misc_surface_colors = np.repeat([[0, 0, 0]], len(misc_obj_models), axis=0) surface_colors = np.concatenate( [main_surface_color, misc_surface_colors]) # Render the object coordinates ground truth and store it as tiff image rendering = renderer.render(image.shape[:2], K, [desired_obj_model] + misc_obj_models, surface_colors, modes=['segmentation']) rendering = rendering['segmentation'] if crop_on_segmentation_mask: rendering = util.crop_image_on_segmentation_color( rendering, rendering, segmentation_color) cv2.imwrite(segmentation_rendering_path, rendering)