def check_bop_results(path, version='bop19'): """Checks if the format of BOP results is correct. :param result_filenames: Path to a file with pose estimates. :param version: Version of the results. :return: True if the format is correct, False if it is not correct. """ check_passed = True check_msg = 'OK' try: results = load_bop_results(path, version) if version == 'bop19': # Check if the time for all estimates from the same image are the same. times = {} for result in results: result_key = '{:06d}_{:06d}'.format(result['scene_id'], result['im_id']) if result_key in times: if abs(times[result_key] - result['time']) > 0.001: check_passed = False check_msg = \ 'The running time for scene {} and image {} is not the same for' \ ' all estimates.'.format(result['scene_id'], result['im_id']) misc.log(check_msg) break else: times[result_key] = result['time'] except Exception as e: check_passed = False check_msg = 'Error when loading BOP results: {}'.format(e) misc.log(check_msg) return check_passed, check_msg
def save_errors(_error_sign, _scene_errs): # Save the calculated errors to a JSON file. errors_path = p['out_errors_tpath'].format( eval_path=p['eval_path'], result_name=result_name, error_sign=_error_sign, scene_id=scene_id) misc.ensure_dir(os.path.dirname(errors_path)) misc.log('Saving errors to: {}'.format(errors_path)) inout.save_json(errors_path, _scene_errs)
def write_text_on_image(im, txt_list, loc=(3, 0), color=(1.0, 1.0, 1.0), size=20): """Writes text info on an image. :param im: ndarray on which the text info will be written. :param txt_list: List of dictionaries, each describing one info line: - 'name': Entry name. - 'val': Entry value. - 'fmt': String format for the value. :param loc: Location of the top left corner of the text box. :param color: Font color. :param size: Font size. :return: Image with written text info. """ im_pil = Image.fromarray(im) # Load font. try: font_path = os.path.join(os.path.dirname(__file__), 'droid_sans_mono.ttf') font = ImageFont.truetype(font_path, size) except IOError: misc.log('Warning: Loading a fallback font.') font = ImageFont.load_default() draw = ImageDraw.Draw(im_pil) for info in txt_list: if info['name'] != '': txt_tpl = '{}:{' + info['fmt'] + '}' else: txt_tpl = '{}{' + info['fmt'] + '}' txt = txt_tpl.format(info['name'], info['val']) draw.text(loc, txt, fill=tuple([int(c * 255) for c in color]), font=font) text_width, text_height = font.getsize(txt) loc = (loc[0], loc[1] + text_height) del draw return np.array(im_pil)
def calc_localization_scores(scene_ids, obj_ids, matches, n_top, do_print=True): """Calculates performance scores for the 6D object localization task. References: Hodan et al., BOP: Benchmark for 6D Object Pose Estimation, ECCV'18. Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16. :param scene_ids: ID's of considered scenes. :param obj_ids: ID's of considered objects. :param matches: Info about matching pose estimates to ground-truth poses (see pose_matching.py for details). :param n_top: Number of top pose estimates to consider per test target. :param do_print: Whether to print the scores to the standard output. :return: Dictionary with the evaluation scores. """ # Count the number of visible object instances in each image. insts = {i: {j: defaultdict(lambda: 0) for j in scene_ids} for i in obj_ids} for m in matches: if m['valid']: insts[m['obj_id']][m['scene_id']][m['im_id']] += 1 # Count the number of targets = object instances to be found. # For SiSo, there is either zero or one target in each image - there is just # one even if there are more instances of the object of interest. tars = 0 # Total number of targets. obj_tars = {i: 0 for i in obj_ids} # Targets per object. scene_tars = {i: 0 for i in scene_ids} # Targets per scene. for obj_id, obj_insts in insts.items(): for scene_id, scene_insts in obj_insts.items(): # Count the number of targets for the current object in the current scene. if n_top > 0: count = sum(np.minimum(n_top, list(scene_insts.values()))) else: count = sum(list(scene_insts.values())) tars += count obj_tars[obj_id] += count scene_tars[scene_id] += count # Count the number of true positives. tps = 0 # Total number of true positives. obj_tps = {i: 0 for i in obj_ids} # True positives per object. scene_tps = {i: 0 for i in scene_ids} # True positives per scene. for m in matches: if m['valid'] and m['est_id'] != -1: tps += 1 obj_tps[m['obj_id']] += 1 scene_tps[m['scene_id']] += 1 # Total recall. recall = calc_recall(tps, tars) # Recall per object. obj_recalls = {} for i in obj_ids: obj_recalls[i] = calc_recall(obj_tps[i], obj_tars[i]) mean_obj_recall = float(np.mean(list(obj_recalls.values())).squeeze()) # Recall per scene. scene_recalls = {} for i in scene_ids: scene_recalls[i] = float(calc_recall(scene_tps[i], scene_tars[i])) mean_scene_recall = float(np.mean(list(scene_recalls.values())).squeeze()) # Final scores. scores = { 'recall': float(recall), 'obj_recalls': obj_recalls, 'mean_obj_recall': float(mean_obj_recall), 'scene_recalls': scene_recalls, 'mean_scene_recall': float(mean_scene_recall), 'gt_count': len(matches), 'targets_count': int(tars), 'tp_count': int(tps), } if do_print: obj_recalls_str = ', '.join( ['{}: {:.3f}'.format(i, s) for i, s in scores['obj_recalls'].items()]) scene_recalls_str = ', '.join( ['{}: {:.3f}'.format(i, s) for i, s in scores['scene_recalls'].items()]) misc.log('') misc.log('GT count: {:d}'.format(scores['gt_count'])) misc.log('Target count: {:d}'.format(scores['targets_count'])) misc.log('TP count: {:d}'.format(scores['tp_count'])) misc.log('Recall: {:.4f}'.format(scores['recall'])) misc.log('Mean object recall: {:.4f}'.format(scores['mean_obj_recall'])) misc.log('Mean scene recall: {:.4f}'.format(scores['mean_scene_recall'])) misc.log('Object recalls:\n{}'.format(obj_recalls_str)) misc.log('Scene recalls:\n{}'.format(scene_recalls_str)) misc.log('') return scores
obj_recalls_str = ', '.join( ['{}: {:.3f}'.format(i, s) for i, s in scores['obj_recalls'].items()]) scene_recalls_str = ', '.join( ['{}: {:.3f}'.format(i, s) for i, s in scores['scene_recalls'].items()]) misc.log('') misc.log('GT count: {:d}'.format(scores['gt_count'])) misc.log('Target count: {:d}'.format(scores['targets_count'])) misc.log('TP count: {:d}'.format(scores['tp_count'])) misc.log('Recall: {:.4f}'.format(scores['recall'])) misc.log('Mean object recall: {:.4f}'.format(scores['mean_obj_recall'])) misc.log('Mean scene recall: {:.4f}'.format(scores['mean_scene_recall'])) misc.log('Object recalls:\n{}'.format(obj_recalls_str)) misc.log('Scene recalls:\n{}'.format(scene_recalls_str)) misc.log('') return scores if __name__ == '__main__': # AP test. tp = np.array([False, True, True, False, True, False]) fp = np.logical_not(tp) tp_c = np.cumsum(tp).astype(np.float) fp_c = np.cumsum(fp).astype(np.float) rec = tp_c / tp.size pre = tp_c / (fp_c + tp_c) misc.log('Average Precision: ' + str(calc_ap(rec, pre)))
def eval_bop(eval_args, eval_dir, filename, errors): # Command line arguments. # ------------------------------------------------------------------------------ p['results_path'] = os.path.join(eval_dir, 'bop') p['eval_path'] = p['results_path'] p['result_filenames'] = [filename] p['renderer_type'] = eval_args.get('DATA', 'RENDERER_TYPE') p['targets_filename'] = eval_args.get('DATA', 'TARGETS_FILENAME') p['visib_gt_min'] = eval_args.getfloat('METRIC', 'VISIB_GT_MIN') p['errors'] = errors # Evaluation. # ------------------------------------------------------------------------------ for result_filename in p['result_filenames']: misc.log('===========') misc.log('EVALUATING: {}'.format(result_filename)) misc.log('===========') time_start = time.time() # Volume under recall surface (VSD) / area under recall curve (MSSD, MSPD). average_recalls = {} # Name of the result and the dataset. result_name = os.path.splitext(os.path.basename(result_filename))[0] dataset = str(result_name.split('_')[1].split('-')[0]) # Calculate the average estimation time per image. ests = inout.load_bop_results(os.path.join(p['results_path'], result_filename), version='bop19') times = {} times_available = True for est in ests: result_key = '{:06d}_{:06d}'.format(est['scene_id'], est['im_id']) if est['time'] < 0: # All estimation times must be provided. times_available = False break elif result_key in times: if abs(times[result_key] - est['time']) > 0.001: raise ValueError( 'The running time for scene {} and image {} is not the same for ' 'all estimates.'.format(est['scene_id'], est['im_id'])) else: times[result_key] = est['time'] if times_available: average_time_per_image = np.mean(list(times.values())) else: average_time_per_image = -1.0 # Evaluate the pose estimates. for error in p['errors']: # Paths (rel. to p['eval_path']) to folders with calculated pose errors. # For VSD, there is one path for each setting of tau. For the other pose # error functions, there is only one path. error_dir_paths = {} if error['type'] == 'vsd': for vsd_tau in error['vsd_taus']: error_sign = misc.get_error_signature( error['type'], error['n_top'], vsd_delta=error['vsd_deltas'][dataset], vsd_tau=vsd_tau) error_dir_paths[error_sign] = os.path.join( result_name, error_sign) else: error_sign = misc.get_error_signature(error['type'], error['n_top']) error_dir_paths[error_sign] = os.path.join( result_name, error_sign) # Recall scores for all settings of the threshold of correctness (and also # of the misalignment tolerance tau in the case of VSD). recalls = [] # Calculate performance scores. for error_sign, error_dir_path in error_dir_paths.items(): for correct_th in error['correct_th']: # Path to file with calculated scores. score_sign = misc.get_score_signature([correct_th], p['visib_gt_min']) scores_filename = 'scores_{}.json'.format(score_sign) scores_path = os.path.join(p['eval_path'], result_name, error_sign, scores_filename) # Load the scores. misc.log('Loading calculated scores from: {}'.format( scores_path)) scores = inout.load_json(scores_path) recalls.append(scores['recall']) average_recalls[error['type']] = np.mean(recalls) misc.log('Recall scores: {}'.format(' '.join(map(str, recalls)))) misc.log('Average recall: {}'.format( average_recalls[error['type']])) time_total = time.time() - time_start misc.log('Evaluation of {} took {}s.'.format(result_filename, time_total)) # Calculate the final scores. final_scores = {} for error in p['errors']: final_scores['bop19_average_recall_{}'.format(error['type'])] =\ average_recalls[error['type']] # Final score for the given dataset. #final_scores['bop19_average_recall'] = np.mean([ # average_recalls['vsd'], average_recalls['mssd'], average_recalls['mspd']]) final_scores['bop19_average_recall'] = np.mean( [average_recalls['vsd']]) # Average estimation time per image. final_scores['bop19_average_time_per_image'] = average_time_per_image # Save the final scores. final_scores_path = os.path.join(p['eval_path'], result_name, 'scores_bop19.json') inout.save_json(final_scores_path, final_scores) # Print the final scores. misc.log('FINAL SCORES:') for score_name, score_value in final_scores.items(): misc.log('- {}: {}'.format(score_name, score_value)) misc.log('Done.')
def eval_calc_scores(eval_args, eval_dir, filename): # Command line arguments. # ------------------------------------------------------------------------------ ''' parser = argparse.ArgumentParser() # Define the command line arguments. for err_type in p['correct_th']: parser.add_argument( '--correct_th_' + err_type, default=','.join(map(str, p['correct_th'][err_type]))) for err_type in p['correct_th']: p['correct_th'][err_type] =\ list(map(float, args.__dict__['correct_th_' + err_type].split(','))) p['normalized_by_diameter'] = args.normalized_by_diameter.split(',') p['normalized_by_im_width'] = args.normalized_by_im_width.split(',') ''' p['error_dir_paths'] = [ f for f in os.listdir(filename.replace('.csv', '')) ] p['eval_path'] = os.path.join(eval_dir, 'bop') p['datasets_path'] = eval_args.get('DATA', 'DATA_PATH') p['targets_filename'] = eval_args.get('DATA', 'TARGETS_FILENAME') p['visib_gt_min'] = eval_args.getfloat('METRIC', 'VISIB_GT_MIN') info = filename.split('/')[-1] result_info = info.replace('.csv', '') p['eval_path'] = p['eval_path'] + '/' + result_info result_info = result_info.split('_') misc.log('-----------') misc.log('Parameters:') for k, v in p.items(): misc.log('- {}: {}'.format(k, v)) misc.log('-----------') # Calculation of the performance scores. # ------------------------------------------------------------------------------ for error_dir_path in p['error_dir_paths']: misc.log('Processing: {}'.format(error_dir_path)) time_start = time.time() # Parse info about the errors from the folder name. error_sign = os.path.basename(error_dir_path) err_type = str(error_sign.split('_')[0].split('=')[1]) n_top = int(error_sign.split('_')[1].split('=')[1]) method = result_info[0] dataset_info = result_info[1].split('-') dataset = dataset_info[0] split = dataset_info[1] split_type = dataset_info[2] if len(dataset_info) > 2 else None # Evaluation signature. score_sign = misc.get_score_signature(p['correct_th'][err_type], p['visib_gt_min']) misc.log( 'Calculating score - error: {}, method: {}, dataset: {}.'.format( err_type, method, dataset)) # Load dataset parameters. dp_split = dataset_params.get_split_params(p['datasets_path'], dataset, split, split_type) model_type = 'eval' dp_model = dataset_params.get_model_params(p['datasets_path'], dataset, model_type) # Load info about the object models. models_info = inout.load_json(dp_model['models_info_path'], keys_to_int=True) # Load the estimation targets to consider. # print p['targets_filename'] targets = inout.load_json( os.path.join(dp_split['base_path'], p['targets_filename'])) scene_im_ids = {} # Organize the targets by scene, image and object. misc.log('Organizing estimation targets...') targets_org = {} for target in targets: # ADD if target['obj_id'] == eval_args.getint('DATA', 'OBJ_ID'): targets_org.setdefault(target['scene_id'], {}).setdefault( target['im_id'], {})[target['obj_id']] = target # Go through the test scenes and match estimated poses to GT poses. # ---------------------------------------------------------------------------- matches = [ ] # Stores info about the matching pose estimate for each GT pose. for scene_id, scene_targets in targets_org.items(): misc.log('Processing scene {} of {}...'.format(scene_id, dataset)) # Load GT poses for the current scene. scene_gt = inout.load_scene_gt( dp_split['scene_gt_tpath'].format(scene_id=scene_id)) # Load info about the GT poses (e.g. visibility) for the current scene. scene_gt_info = inout.load_json( dp_split['scene_gt_info_tpath'].format(scene_id=scene_id), keys_to_int=True) # Keep GT poses only for the selected targets. scene_gt_curr = {} scene_gt_info_curr = {} scene_gt_valid = {} for im_id, im_targets in scene_targets.items(): scene_gt_curr[im_id] = scene_gt[im_id] # Determine which GT poses are valid. im_gt = scene_gt[im_id] im_gt_info = scene_gt_info[im_id] scene_gt_valid[im_id] = [True] * len(im_gt) if p['visib_gt_min'] >= 0: # All GT poses visible from at least 100 * p['visib_gt_min'] percent # are considered valid. for gt_id, gt in enumerate(im_gt): is_target = gt['obj_id'] in im_targets.keys() is_visib = im_gt_info[gt_id]['visib_fract'] >= p[ 'visib_gt_min'] scene_gt_valid[im_id][gt_id] = is_target and is_visib else: # k most visible GT poses are considered valid, where k is given by # the "inst_count" item loaded from "targets_filename". gt_ids_sorted = sorted( range(len(im_gt)), key=lambda gt_id: im_gt_info[gt_id]['visib_fract'], reverse=True) to_add =\ {obj_id: trg['inst_count'] for obj_id, trg in im_targets.items()} for gt_id in gt_ids_sorted: obj_id = im_gt[gt_id]['obj_id'] if obj_id in to_add.keys() and to_add[obj_id] > 0: scene_gt_valid[im_id][gt_id] = True to_add[obj_id] -= 1 else: scene_gt_valid[im_id][gt_id] = False # Load pre-calculated errors of the pose estimates w.r.t. the GT poses. scene_errs_path = p['error_tpath'].format( eval_path=p['eval_path'], error_dir_path=error_dir_path, scene_id=scene_id) scene_errs = inout.load_json(scene_errs_path, keys_to_int=True) # Normalize the errors by the object diameter. if err_type in p['normalized_by_diameter']: for err in scene_errs: diameter = float(models_info[err['obj_id']]['diameter']) for gt_id in err['errors'].keys(): err['errors'][gt_id] = [ e / diameter for e in err['errors'][gt_id] ] # Normalize the errors by the image width. if err_type in p['normalized_by_im_width']: for err in scene_errs: factor = 640.0 / float(dp_split['im_size'][0]) for gt_id in err['errors'].keys(): err['errors'][gt_id] = [ factor * e for e in err['errors'][gt_id] ] # Match the estimated poses to the ground-truth poses. matches += pose_matching.match_poses_scene( scene_id, scene_gt_curr, scene_gt_valid, scene_errs, p['correct_th'][err_type], n_top) # Calculate the performance scores. # ---------------------------------------------------------------------------- # 6D object localization scores (SiSo if n_top = 1). scores = score.calc_localization_scores(dp_split['scene_ids'], dp_model['obj_ids'], matches, n_top) # Save scores. scores_path = p['out_scores_tpath'].format( eval_path=p['eval_path'], error_dir_path=error_dir_path, score_sign=score_sign) inout.save_json(scores_path, scores) # Save matches. matches_path = p['out_matches_tpath'].format( eval_path=p['eval_path'], error_dir_path=error_dir_path, score_sign=score_sign) inout.save_json(matches_path, matches) time_total = time.time() - time_start misc.log('Matching and score calculation took {}s.'.format(time_total)) misc.log('Done.')
def load_ply(path): """Loads a 3D mesh model from a PLY file. :param path: Path to a PLY file. :return: The loaded model given by a dictionary with items: - 'pts' (nx3 ndarray) - 'normals' (nx3 ndarray), optional - 'colors' (nx3 ndarray), optional - 'faces' (mx3 ndarray), optional - 'texture_uv' (nx2 ndarray), optional - 'texture_uv_face' (mx6 ndarray), optional - 'texture_file' (string), optional """ f = open(path, 'rb') # Only triangular faces are supported. face_n_corners = 3 n_pts = 0 n_faces = 0 pt_props = [] face_props = [] is_binary = False header_vertex_section = False header_face_section = False texture_file = None # Read the header. while True: # Strip the newline character(s). line = f.readline().decode('utf8').rstrip('\n').rstrip('\r') if line.startswith('comment TextureFile'): texture_file = line.split()[-1] elif line.startswith('element vertex'): n_pts = int(line.split()[-1]) header_vertex_section = True header_face_section = False elif line.startswith('element face'): n_faces = int(line.split()[-1]) header_vertex_section = False header_face_section = True elif line.startswith('element'): # Some other element. header_vertex_section = False header_face_section = False elif line.startswith('property') and header_vertex_section: # (name of the property, data type) pt_props.append((line.split()[-1], line.split()[-2])) elif line.startswith('property list') and header_face_section: elems = line.split() if elems[-1] == 'vertex_indices' or elems[-1] == 'vertex_index': # (name of the property, data type) face_props.append(('n_corners', elems[2])) for i in range(face_n_corners): face_props.append(('ind_' + str(i), elems[3])) elif elems[-1] == 'texcoord': # (name of the property, data type) face_props.append(('texcoord', elems[2])) for i in range(face_n_corners * 2): face_props.append(('texcoord_ind_' + str(i), elems[3])) else: misc.log('Warning: Not supported face property: ' + elems[-1]) elif line.startswith('format'): if 'binary' in line: is_binary = True elif line.startswith('end_header'): break # Prepare data structures. model = {} if texture_file is not None: model['texture_file'] = texture_file model['pts'] = np.zeros((n_pts, 3), np.float) if n_faces > 0: model['faces'] = np.zeros((n_faces, face_n_corners), np.float) pt_props_names = [p[0] for p in pt_props] face_props_names = [p[0] for p in face_props] is_normal = False if {'nx', 'ny', 'nz'}.issubset(set(pt_props_names)): is_normal = True model['normals'] = np.zeros((n_pts, 3), np.float) is_color = False if {'red', 'green', 'blue'}.issubset(set(pt_props_names)): is_color = True model['colors'] = np.zeros((n_pts, 3), np.float) is_texture_pt = False if {'texture_u', 'texture_v'}.issubset(set(pt_props_names)): is_texture_pt = True model['texture_uv'] = np.zeros((n_pts, 2), np.float) is_texture_face = False if {'texcoord'}.issubset(set(face_props_names)): is_texture_face = True model['texture_uv_face'] = np.zeros((n_faces, 6), np.float) # Formats for the binary case. formats = { 'float': ('f', 4), 'double': ('d', 8), 'int': ('i', 4), 'uchar': ('B', 1) } # Load vertices. for pt_id in range(n_pts): prop_vals = {} load_props = [ 'x', 'y', 'z', 'nx', 'ny', 'nz', 'red', 'green', 'blue', 'texture_u', 'texture_v' ] if is_binary: for prop in pt_props: format = formats[prop[1]] read_data = f.read(format[1]) val = struct.unpack(format[0], read_data)[0] if prop[0] in load_props: prop_vals[prop[0]] = val else: elems = f.readline().decode('utf8').rstrip('\n').rstrip( '\r').split() for prop_id, prop in enumerate(pt_props): if prop[0] in load_props: prop_vals[prop[0]] = elems[prop_id] model['pts'][pt_id, 0] = float(prop_vals['x']) model['pts'][pt_id, 1] = float(prop_vals['y']) model['pts'][pt_id, 2] = float(prop_vals['z']) if is_normal: model['normals'][pt_id, 0] = float(prop_vals['nx']) model['normals'][pt_id, 1] = float(prop_vals['ny']) model['normals'][pt_id, 2] = float(prop_vals['nz']) if is_color: model['colors'][pt_id, 0] = float(prop_vals['red']) model['colors'][pt_id, 1] = float(prop_vals['green']) model['colors'][pt_id, 2] = float(prop_vals['blue']) if is_texture_pt: model['texture_uv'][pt_id, 0] = float(prop_vals['texture_u']) model['texture_uv'][pt_id, 1] = float(prop_vals['texture_v']) # Load faces. for face_id in range(n_faces): prop_vals = {} if is_binary: for prop in face_props: format = formats[prop[1]] val = struct.unpack(format[0], f.read(format[1]))[0] if prop[0] == 'n_corners': if val != face_n_corners: raise ValueError( 'Only triangular faces are supported.') elif prop[0] == 'texcoord': if val != face_n_corners * 2: raise ValueError( 'Wrong number of UV face coordinates.') else: prop_vals[prop[0]] = val else: elems = f.readline().decode('utf8').rstrip('\n').rstrip( '\r').split() for prop_id, prop in enumerate(face_props): if prop[0] == 'n_corners': if int(elems[prop_id]) != face_n_corners: raise ValueError( 'Only triangular faces are supported.') elif prop[0] == 'texcoord': if int(elems[prop_id]) != face_n_corners * 2: raise ValueError( 'Wrong number of UV face coordinates.') else: prop_vals[prop[0]] = elems[prop_id] model['faces'][face_id, 0] = int(prop_vals['ind_0']) model['faces'][face_id, 1] = int(prop_vals['ind_1']) model['faces'][face_id, 2] = int(prop_vals['ind_2']) if is_texture_face: for i in range(6): model['texture_uv_face'][face_id, i] = float( prop_vals['texcoord_ind_{}'.format(i)]) f.close() return model
np.array([0, 0, 0]), np.array([0, 0, 0]) ] if views_level: max_level = max(1, max(views_level)) intens = (255 * views_level[view_id]) / float(max_level) else: intens = 255 * view_id / float(len(views)) colors += [[intens, intens, intens], [255, 0, 0], [0, 255, 0], [0, 0, 255]] inout.save_ply2(path, pts=np.array(pts), pts_normals=np.array(normals), pts_colors=np.array(colors)) if __name__ == '__main__': # Example of sampling views from a view sphere. views, views_level = sample_views(min_n_views=25, radius=1, azimuth_range=(0, 2 * math.pi), elev_range=(-0.5 * math.pi, 0.5 * math.pi), mode='fibonacci') misc.log('Sampled views: ' + str(len(views))) out_views_vis_path = 'view_sphere.ply' save_vis(out_views_vis_path, views)
def eval_calc_errors(eval_args, eval_dir, filename): # Command line arguments. # ------------------------------------------------------------------------------ p['datasets_path'] = eval_args.get('DATA', 'DATA_PATH') p['results_path'] = os.path.join(eval_dir, 'bop') p['eval_path'] = p['results_path'] p['result_filenames'] = [filename] p['renderer_type'] = eval_args.get('DATA', 'RENDERER_TYPE') p['targets_filename'] = eval_args.get('DATA', 'TARGETS_FILENAME') p['error_type'] = eval_args.get('METRIC', 'ERROR_TYPE') p['n_top'] = eval_args.getint('METRIC', 'TOP_N') #p['vsd_deltas'] = {str(e.split(':')[0]): float(e.split(':')[1]) # for e in eval_args.get('METRIC', 'VSD_DELTA').split(',')} p['vsd_taus'] = list( map(float, eval_args.get('METRIC', 'VSD_TAU').split(','))) errors = [] error_tmp = {'n_top':p['n_top'],\ 'type':p['error_type'],\ 'vsd_deltas':p['vsd_deltas'],\ 'vsd_taus':p['vsd_taus'],\ 'vsd_normalized_by_diameter':p['vsd_normalized_by_diameter'],\ 'correct_th':[0.30]} errors.append(error_tmp) misc.log('-----------') misc.log('Parameters:') for k, v in p.items(): misc.log('- {}: {}'.format(k, v)) misc.log('-----------') # Error calculation. # ------------------------------------------------------------------------------ for result_filename in p['result_filenames']: misc.log('Processing: {}'.format(result_filename)) ests_counter = 0 time_start = time.time() # Parse info about the method and the dataset from the filename. result_name = os.path.splitext(os.path.basename(result_filename))[0] # print result_name result_info = result_name.split('_') # print result_info method = str(result_info[0]) # print method dataset_info = result_info[1].split('-') dataset = str(dataset_info[0]) split = str(dataset_info[1]) split_type = str(dataset_info[2]) if len(dataset_info) > 2 else None split_type_str = ' - ' + split_type if split_type is not None else '' # Load dataset parameters. dp_split = dataset_params.get_split_params(p['datasets_path'], dataset, split, split_type) model_type = 'eval' dp_model = dataset_params.get_model_params(p['datasets_path'], dataset, model_type) # ADD dp_model['obj_ids'] = [eval_args.getint('DATA', 'OBJ_ID')] # Load object models. models = {} if p['error_type'] in ['ad', 'add', 'adi', 'mssd', 'mspd', 'proj']: misc.log('Loading object models...') for obj_id in dp_model['obj_ids']: models[obj_id] = inout.load_ply( dp_model['model_tpath'].format(obj_id=obj_id)) # Load models info. models_info = None if p['error_type'] in [ 'ad', 'add', 'adi', 'vsd', 'mssd', 'mspd', 'cus' ]: models_info = inout.load_json(dp_model['models_info_path'], keys_to_int=True) # Get sets of symmetry transformations for the object models. models_sym = None if p['error_type'] in ['mssd', 'mspd']: models_sym = {} for obj_id in dp_model['obj_ids']: models_sym[obj_id] = misc.get_symmetry_transformations( models_info[obj_id], p['max_sym_disc_step']) # Initialize a renderer. ren = None if p['error_type'] in ['vsd', 'cus']: misc.log('Initializing renderer...') width, height = dp_split['im_size'] ren = renderer.create_renderer(width, height, p['renderer_type'], mode='depth') for obj_id in dp_model['obj_ids']: ren.add_object(obj_id, dp_model['model_tpath'].format(obj_id=obj_id)) # Load the estimation targets. targets = inout.load_json( os.path.join(dp_split['base_path'], p['targets_filename'])) # Organize the targets by scene, image and object. misc.log('Organizing estimation targets...') targets_org = {} for target in targets: # ADD if target['obj_id'] == eval_args.getint('DATA', 'OBJ_ID'): targets_org.setdefault(target['scene_id'], {}).setdefault( target['im_id'], {})[target['obj_id']] = target # Load pose estimates. misc.log('Loading pose estimates...') ests = inout.load_bop_results( os.path.join(p['results_path'], result_filename)) # Organize the pose estimates by scene, image and object. misc.log('Organizing pose estimates...') ests_org = {} for est in ests: ests_org.setdefault(est['scene_id'], {}).setdefault( est['im_id'], {}).setdefault(est['obj_id'], []).append(est) for scene_id, scene_targets in targets_org.items(): # Load camera and GT poses for the current scene. scene_camera = inout.load_scene_camera( dp_split['scene_camera_tpath'].format(scene_id=scene_id)) scene_gt = inout.load_scene_gt( dp_split['scene_gt_tpath'].format(scene_id=scene_id)) scene_errs = [] for im_ind, (im_id, im_targets) in enumerate(scene_targets.items()): if im_ind % 10 == 0: misc.log( 'Calculating error {} - method: {}, dataset: {}{}, scene: {}, ' 'im: {}'.format(p['error_type'], method, dataset, split_type_str, scene_id, im_ind)) # Intrinsic camera matrix. K = scene_camera[im_id]['cam_K'] # Load the depth image if VSD is selected as the pose error function. depth_im = None if p['error_type'] == 'vsd': depth_path = dp_split['depth_tpath'].format( scene_id=scene_id, im_id=im_id) depth_im = inout.load_depth(depth_path) depth_im *= scene_camera[im_id][ 'depth_scale'] # Convert to [mm]. for obj_id, target in im_targets.items(): # The required number of top estimated poses. if p['n_top'] == 0: # All estimates are considered. n_top_curr = None elif p['n_top'] == -1: # Given by the number of GT poses. # n_top_curr = sum([gt['obj_id'] == obj_id for gt in scene_gt[im_id]]) n_top_curr = target['inst_count'] else: n_top_curr = p['n_top'] # Get the estimates. try: obj_ests = ests_org[scene_id][im_id][obj_id] obj_count = len(obj_ests) except KeyError: obj_ests = [] obj_count = 0 # Check the number of estimates. if not p['skip_missing'] and obj_count < n_top_curr: raise ValueError( 'Not enough estimates for scene: {}, im: {}, obj: {} ' '(provided: {}, expected: {})'.format( scene_id, im_id, obj_id, obj_count, n_top_curr)) # Sort the estimates by score (in descending order). obj_ests_sorted = sorted(enumerate(obj_ests), key=lambda x: x[1]['score'], reverse=True) # Select the required number of top estimated poses. obj_ests_sorted = obj_ests_sorted[slice(0, n_top_curr)] ests_counter += len(obj_ests_sorted) # Calculate error of each pose estimate w.r.t. all GT poses of the same # object class. for est_id, est in obj_ests_sorted: # Estimated pose. R_e = est['R'] t_e = est['t'] errs = { } # Errors w.r.t. GT poses of the same object class. for gt_id, gt in enumerate(scene_gt[im_id]): if gt['obj_id'] != obj_id: continue # Ground-truth pose. R_g = gt['cam_R_m2c'] t_g = gt['cam_t_m2c'] # Check if the projections of the bounding spheres of the object in # the two poses overlap (to speed up calculation of some errors). sphere_projections_overlap = None if p['error_type'] in ['vsd', 'cus']: radius = 0.5 * models_info[obj_id]['diameter'] sphere_projections_overlap = misc.overlapping_sphere_projections( radius, t_e.squeeze(), t_g.squeeze()) # Check if the bounding spheres of the object in the two poses # overlap (to speed up calculation of some errors). spheres_overlap = None if p['error_type'] in ['ad', 'add', 'adi', 'mssd']: center_dist = np.linalg.norm(t_e - t_g) spheres_overlap = center_dist < models_info[ obj_id]['diameter'] if p['error_type'] == 'vsd': if not sphere_projections_overlap: e = [1.0] * len(p['vsd_taus']) else: e = pose_error.vsd( R_e, t_e, R_g, t_g, depth_im, K, p['vsd_deltas'][dataset], p['vsd_taus'], p['vsd_normalized_by_diameter'], models_info[obj_id]['diameter'], ren, obj_id, 'step') elif p['error_type'] == 'mssd': if not spheres_overlap: e = [float('inf')] else: e = [ pose_error.mssd( R_e, t_e, R_g, t_g, models[obj_id]['pts'], models_sym[obj_id]) ] elif p['error_type'] == 'mspd': e = [ pose_error.mspd(R_e, t_e, R_g, t_g, K, models[obj_id]['pts'], models_sym[obj_id]) ] elif p['error_type'] in ['ad', 'add', 'adi']: if not spheres_overlap: # Infinite error if the bounding spheres do not overlap. With # typically used values of the correctness threshold for the AD # error (e.g. k*diameter, where k = 0.1), such pose estimates # would be considered incorrect anyway. e = [float('inf')] else: if p['error_type'] == 'ad': if obj_id in dp_model[ 'symmetric_obj_ids']: e = [ pose_error.adi( R_e, t_e, R_g, t_g, models[obj_id]['pts']) ] else: e = [ pose_error.add( R_e, t_e, R_g, t_g, models[obj_id]['pts']) ] elif p['error_type'] == 'add': e = [ pose_error.add( R_e, t_e, R_g, t_g, models[obj_id]['pts']) ] else: # 'adi' e = [ pose_error.adi( R_e, t_e, R_g, t_g, models[obj_id]['pts']) ] elif p['error_type'] == 'cus': if sphere_projections_overlap: e = [ pose_error.cus(R_e, t_e, R_g, t_g, K, ren, obj_id) ] else: e = [1.0] elif p['error_type'] == 'proj': e = [ pose_error.proj(R_e, t_e, R_g, t_g, K, models[obj_id]['pts']) ] elif p['error_type'] == 'rete': e = [ pose_error.re(R_e, R_g), pose_error.te(t_e, t_g) ] elif p['error_type'] == 're': e = [pose_error.re(R_e, R_g)] elif p['error_type'] == 'te': e = [pose_error.te(t_e, t_g)] else: raise ValueError( 'Unknown pose error function.') errs[gt_id] = e # Save the calculated errors. scene_errs.append({ 'im_id': im_id, 'obj_id': obj_id, 'est_id': est_id, 'score': est['score'], 'errors': errs }) def save_errors(_error_sign, _scene_errs): # Save the calculated errors to a JSON file. errors_path = p['out_errors_tpath'].format( eval_path=p['eval_path'], result_name=result_name, error_sign=_error_sign, scene_id=scene_id) misc.ensure_dir(os.path.dirname(errors_path)) misc.log('Saving errors to: {}'.format(errors_path)) inout.save_json(errors_path, _scene_errs) # Save the calculated errors. if p['error_type'] == 'vsd': # For VSD, save errors for each tau value to a different file. for vsd_tau_id, vsd_tau in enumerate(p['vsd_taus']): error_sign = misc.get_error_signature( p['error_type'], p['n_top'], vsd_delta=p['vsd_deltas'][dataset], vsd_tau=vsd_tau) # Keep only errors for the current tau. scene_errs_curr = copy.deepcopy(scene_errs) for err in scene_errs_curr: for gt_id in err['errors'].keys(): err['errors'][gt_id] = [ err['errors'][gt_id][vsd_tau_id] ] save_errors(error_sign, scene_errs_curr) else: error_sign = misc.get_error_signature(p['error_type'], p['n_top']) save_errors(error_sign, scene_errs) time_total = time.time() - time_start misc.log('Calculation of errors for {} estimates took {}s.'.format( ests_counter, time_total)) misc.log('Done.') return errors