def gen_gt_observed(): for cls_idx, cls_name in idx2class.items(): print(cls_idx, cls_name) # uncomment here to only generate data for ape # if cls_name != 'ape': # continue with open( os.path.join(observed_set_dir, "{}_all.txt".format(cls_name)), "r") as f: all_indices = [line.strip("\r\n") for line in f.readlines()] # render machine model_dir = os.path.join(LM6d_root, "models", cls_name) render_machine = Render_Py(model_dir, K, width, height, ZNEAR, ZFAR) for observed_idx in tqdm(all_indices): video_name, prefix = observed_idx.split("/") # read pose ------------------------------------- observed_meta_path = os.path.join( observed_data_root, "{}-meta.mat".format(observed_idx)) meta_data = sio.loadmat(observed_meta_path) inner_id = np.where( np.squeeze(meta_data["cls_indexes"]) == cls_idx) if len(meta_data["poses"].shape) == 2: pose = meta_data["poses"] else: pose = np.squeeze(meta_data["poses"][:, :, inner_id]) new_pose_path = os.path.join(gt_observed_root, cls_name, "{}-pose.txt".format(prefix)) mkdir_if_missing(os.path.join(gt_observed_root, cls_name)) # write pose write_pose_file(new_pose_path, cls_idx, pose) # ----------------------render color, depth ------------ rgb_gl, depth_gl = render_machine.render( RT_transform.mat2quat(pose[:3, :3]), pose[:, -1]) if any([x in observed_idx for x in ["000128", "000256", "000512"]]): rgb_gl = rgb_gl.astype("uint8") render_color_path = os.path.join(gt_observed_root, cls_name, "{}-color.png".format(prefix)) cv2.imwrite(render_color_path, rgb_gl) # depth depth_save = depth_gl * DEPTH_FACTOR depth_save = depth_save.astype("uint16") render_depth_path = os.path.join(gt_observed_root, cls_name, "{}-depth.png".format(prefix)) cv2.imwrite(render_depth_path, depth_save) # --------------------- render label ---------------------------------- render_label = depth_gl != 0 render_label = render_label.astype("uint8") # write label label_path = os.path.join(gt_observed_root, cls_name, "{}-label.png".format(prefix)) cv2.imwrite(label_path, render_label)
def gen_render_real(): for cls_idx, cls_name in idx2class.items(): print(cls_idx, cls_name) if cls_name == 'driller': continue with open( os.path.join(real_set_dir, 'occLM_val_real_{}.txt'.format(cls_name)), 'r') as f: all_indices = [line.strip('\r\n') for line in f.readlines()] # render machine model_dir = os.path.join(LM6d_root, 'models', cls_name) render_machine = Render_Py(model_dir, K, width, height, ZNEAR, ZFAR) for real_idx in tqdm(all_indices): video_name, prefix = real_idx.split('/') # video name is "test" # read pose ------------------------------------- real_meta_path = os.path.join(real_data_root, "02/{}-meta.mat".format(prefix)) meta_data = sio.loadmat(real_meta_path) inner_id = np.where( np.squeeze(meta_data['cls_indexes']) == cls_idx) if len(meta_data['poses'].shape) == 2: pose = meta_data['poses'] else: pose = np.squeeze(meta_data['poses'][:, :, inner_id]) new_pose_path = os.path.join(render_real_root, cls_name, "{}-pose.txt".format(prefix)) mkdir_if_missing(os.path.join(render_real_root, cls_name)) # write pose write_pose_file(new_pose_path, cls_idx, pose) # ----------------------render color, depth ------------ rgb_gl, depth_gl = render_machine.render( RT_transform.mat2quat(pose[:3, :3]), pose[:, -1]) rgb_gl = rgb_gl.astype('uint8') render_color_path = os.path.join(render_real_root, cls_name, "{}-color.png".format(prefix)) cv2.imwrite(render_color_path, rgb_gl) # depth depth_save = depth_gl * DEPTH_FACTOR depth_save = depth_save.astype('uint16') render_depth_path = os.path.join(render_real_root, cls_name, "{}-depth.png".format(prefix)) cv2.imwrite(render_depth_path, depth_save) #--------------------- render label ---------------------------------- render_label = depth_gl != 0 render_label = render_label.astype('uint8') # write label label_path = os.path.join(render_real_root, cls_name, "{}-label.png".format(prefix)) cv2.imwrite(label_path, render_label)
def stat_YCB_video(): res_dir = os.path.join(cur_path, "../data/LINEMOD_6D/pose_stat_v2") mkdir_if_missing(res_dir) if os.path.exists(os.path.join(res_dir, "trans_from_YCB_video.pkl")): trans_dict = cPickle.load( open(os.path.join(res_dir, "trans_from_YCB_video.pkl"), "rb")) else: pose_dict = {} trans_list = [] trans_lm_list = [] trans_dict = {} for j, observed_idx in enumerate(tqdm(observed_indices)): meta_path = os.path.join(data_dir, observed_idx + "-meta.mat") poses = get_poses_from_meta(meta_path) tmp_pose = np.zeros((len(poses), 6), dtype="float32") tmp_trans = np.zeros((len(poses), 3), dtype="float32") for i, pose in enumerate(poses): rot_euler = mat2euler(pose[:3, :3]) trans = pose[:3, 3] tmp_pose[i, :3] = rot_euler tmp_pose[i, 3:] = trans trans_list.append(trans) trans_lm = np.dot(np.dot(np.linalg.inv(K_lm), K_YCB_video), trans.reshape((3, 1))) trans_lm = trans_lm.reshape((3, )) trans_lm_list.append(trans_lm) tmp_trans[i, :] = trans_lm pose_dict["{:06d}".format(j)] = tmp_pose trans_dict["{:06d}".format(j)] = tmp_trans trans_array = np.array(trans_list) trans_mean = np.mean(trans_array, 0) trans_std = np.std(trans_array, 0) print("trans, ", "mean: ", trans_mean, "std: ", trans_std) trans_lm_array = np.array(trans_lm_list) trans_lm_mean = np.mean(trans_lm_array, 0) trans_lm_std = np.std(trans_lm_array, 0) print("trans lm, ", "mean: ", trans_lm_mean, "std: ", trans_lm_std) print(len(pose_dict)) # cPickle.dump(pose_dict, open(os.path.join(res_dir, 'YCB_video_pose_dict.pkl'), 'wb'), 2) # {prefix: array(num_posex7)}, num_pose is uncertain cPickle.dump( trans_dict, open(os.path.join(res_dir, "trans_from_YCB_video.pkl"), "wb"), 2) return trans_dict
def gen_observed(): # output path observed_root_dir = os.path.join(LINEMOD_syn_root, "data", "observed") image_set_dir = os.path.join(LINEMOD_syn_root, "image_set") mkdir_if_missing(observed_root_dir) mkdir_if_missing(image_set_dir) syn_poses_path = os.path.join(observed_pose_dir, "LM6d_ds_train_observed_pose_all.pkl") with open(syn_poses_path, "rb") as f: syn_pose_dict = cPickle.load(f) for class_idx, class_name in enumerate(classes): if class_name == "__back_ground__": continue # uncomment here to only generate data for ape # if class_name not in ['ape']: # continue # init render machines brightness_ratios = [0.2, 0.25, 0.3, 0.35, 0.4] model_dir = os.path.join(LINEMOD_syn_root, "models", class_name) render_machine = Render_Py_Light(model_dir, K, width, height, ZNEAR, ZFAR, brightness_ratios) syn_poses = syn_pose_dict[class_name] num_poses = syn_poses.shape[0] observed_index_list = [ "{}/{:06d}".format(class_name, i + 1) for i in range(num_poses) ] observed_set_path = os.path.join( image_set_dir, "observed/LM6d_data_syn_train_observed_{}.txt".format(class_name)) mkdir_if_missing(os.path.join(image_set_dir, "observed")) f_observed_set = open(observed_set_path, "w") for idx, observed_index in enumerate(tqdm(observed_index_list)): f_observed_set.write("{}\n".format(observed_index)) prefix = observed_index.split("/")[1] observed_dir = os.path.join(observed_root_dir, class_name) mkdir_if_missing(observed_dir) observed_color_file = os.path.join(observed_dir, prefix + "-color.png") observed_depth_file = os.path.join(observed_dir, prefix + "-depth.png") observed_pose_file = os.path.join(observed_dir, prefix + "-pose.txt") observed_label_file = os.path.join(observed_dir, prefix + "-label.png") pose_quat = syn_poses[idx, :] pose = se3.se3_q2m(pose_quat) # generate random light_position if idx % 6 == 0: light_position = [1, 0, 1] elif idx % 6 == 1: light_position = [1, 1, 1] elif idx % 6 == 2: light_position = [0, 1, 1] elif idx % 6 == 3: light_position = [-1, 1, 1] elif idx % 6 == 4: light_position = [-1, 0, 1] elif idx % 6 == 5: light_position = [0, 0, 1] else: raise Exception("???") light_position = np.array(light_position) * 0.5 # inverse yz light_position[0] += pose[0, 3] light_position[1] -= pose[1, 3] light_position[2] -= pose[2, 3] # randomly adjust color and intensity for light_intensity colors = np.array([[0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1]]) intensity = np.random.uniform(0.9, 1.1, size=(3, )) colors_randk = random.randint(0, colors.shape[0] - 1) light_intensity = colors[colors_randk] * intensity # randomly choose a render machine rm_randk = random.randint(0, len(brightness_ratios) - 1) # get render result rgb_gl, depth_gl = render_machine.render(se3.mat2quat( pose[:3, :3]), pose[:, -1], light_position, light_intensity, brightness_k=rm_randk) rgb_gl = rgb_gl.astype("uint8") # gt_observed label label_gl = np.zeros(depth_gl.shape) # print('depth gl:', depth_gl.shape) label_gl[depth_gl != 0] = 1 cv2.imwrite(observed_color_file, rgb_gl) depth_gl = (depth_gl * depth_factor).astype(np.uint16) cv2.imwrite(observed_depth_file, depth_gl) cv2.imwrite(observed_label_file, label_gl) text_file = open(observed_pose_file, "w") text_file.write("{}\n".format(class_idx)) pose_str = "{} {} {} {}\n{} {} {} {}\n{} {} {} {}".format( pose[0, 0], pose[0, 1], pose[0, 2], pose[0, 3], pose[1, 0], pose[1, 1], pose[1, 2], pose[1, 3], pose[2, 0], pose[2, 1], pose[2, 2], pose[2, 3], ) text_file.write(pose_str) print(class_name, " done")
height = 480 K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) ZNEAR = 0.25 ZFAR = 6.0 depth_factor = 1000 LM6d_root = os.path.join(cur_dir, "../data/LINEMOD_6D/LM6d_converted/LM6d_refine") observed_set_root = os.path.join(LM6d_root, "image_set/observed") rendered_pose_path = "%s/LM6d_{}_rendered_pose_{}.txt" % ( os.path.join(LM6d_root, "rendered_poses") ) # output_path rendered_root_dir = os.path.join(LM6d_root, "data/rendered") pair_set_dir = os.path.join(LM6d_root, "image_set") mkdir_if_missing(rendered_root_dir) mkdir_if_missing(pair_set_dir) print("target path: {}".format(rendered_root_dir)) print("target path: {}".format(pair_set_dir)) def main(): gen_images = True for class_idx, class_name in idx2class.items(): train_pair = [] val_pair = [] print("start ", class_idx, class_name) if class_name in ["__back_ground__"]: continue # uncomment here to only generate data for ape # if class_name not in ['ape']:
from shutil import copyfile from tqdm import tqdm # from lib.utils import renderer, inout import matplotlib.pyplot as plt from lib.render_glumpy.render_py_multi import Render_Py import scipy.io as sio LM6d_origin_root = os.path.join(cur_dir, '../../data/LINEMOD_6D/LM6d_origin/test') # following previous works, part of the observed images are used for training and only images. LM6d_new_root = os.path.join( cur_dir, '../../data/LINEMOD_6D/LM6d_converted/LM6d_refine/data/observed') model_dir = os.path.join( cur_dir, '../../data/LINEMOD_6D/LM6d_converted/LM6d_refine/models') mkdir_if_missing(LM6d_new_root) print("target path: {}".format(LM6d_new_root)) idx2class = { 1: 'ape', 2: 'benchviseblue', 3: 'bowl', 4: 'camera', 5: 'can', 6: 'cat', 7: 'cup', 8: 'driller', 9: 'duck', 10: 'eggbox', 11: 'glue', 12: 'holepuncher',
import sys import os cur_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(1, os.path.join(cur_dir, "../..")) import numpy as np from lib.utils.mkdir_if_missing import mkdir_if_missing LM6d_origin_root = os.path.join(cur_dir, "../../data/LINEMOD_6D/LM6d_origin") version = "v1" LM6d_new_root = os.path.join( cur_dir, "../../data/LINEMOD_6D/LM6d_converted/LM6d_render_{}/".format(version)) src_model_root = os.path.join(LM6d_origin_root, "models") dst_model_root = os.path.join(LM6d_new_root, "models") mkdir_if_missing(dst_model_root) print("target path: {}".format(dst_model_root)) class_list = ["{:02d}".format(i) for i in range(1, 16)] def read_points_from_mesh(mesh_path): """ ply :param mesh_path: :return: """ with open(mesh_path, "r") as f: i = 0 points = [] for line in f:
import numpy as np from lib.utils.mkdir_if_missing import mkdir_if_missing import cv2 import yaml from shutil import copyfile from tqdm import tqdm # from lib.utils import renderer, inout import matplotlib.pyplot as plt from lib.render_glumpy.render_py_multi import Render_Py import scipy.io as sio LM6d_origin_root = os.path.join(cur_dir, '../../data/LINEMOD_6D/LM6d_origin/test') # only origin test images are real images LM6d_new_root = os.path.join(cur_dir, '../../data/LINEMOD_6D/LM6d_converted/real') mkdir_if_missing(LM6d_new_root) real_set_dir = os.path.join(cur_dir, '../../data/LINEMOD_6D/LM6d_converted/image_set/real') mkdir_if_missing(real_set_dir) idx2class = {1: 'ape', 2: 'benchviseblue', 3: 'bowl', 4: 'camera', 5: 'can', 6: 'cat', 7: 'cup', 8: 'driller', 9: 'duck', 10: 'eggbox', 11: 'glue', 12: 'holepuncher',
width = 640 height = 480 K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) # LM ZNEAR = 0.25 ZFAR = 6.0 depth_factor = 1000 # init render machines # brightness_ratios = [0.2, 0.25, 0.3, 0.35, 0.4] ################### modelnet_root = os.path.join(cur_dir, "../data/ModelNet") modelnet40_root = os.path.join(modelnet_root, "ModelNet40") data_dir = os.path.join(modelnet_root, "modelnet_render_v1/data/real") real_set_dir = os.path.join(modelnet_root, "modelnet_render_v1/image_set/real") mkdir_if_missing(real_set_dir) for cls_i, cls_name in enumerate(classes): if not cls_name in test_classes: continue print(cls_name) class_dir = os.path.join(data_dir, cls_name) all_indices = [] train_indices = [] test_indices = [] for set in ["train", "test"]: real_indices = [ fn.split("-")[0] for fn in os.listdir(os.path.join(class_dir, set)) if "color" in fn ]
def gen_poses(): NUM_IMAGES = 20000 pz = np.array([0, 0, 1]) pose_dict, quat_stat, trans_stat, new_points = stat_lm6d() trans_lm_dict = stat_YCB_video() observed_prefix_list = ["{:06d}".format(i + 1) for i in range(NUM_IMAGES)] sel_classes = copy.deepcopy(classes) # prefix: {} for prefix in observed_prefix_list} # store poses observed_pose_dict = {} syn_pose_dir = os.path.join( cur_path, "..", "data/LINEMOD_6D/LM6d_converted/LM6d_occ_dsm/syn_poses_multi") mkdir_if_missing(syn_pose_dir) for i in tqdm(range(NUM_IMAGES)): observed_prefix = observed_prefix_list[i] # randomly choose a set of transes rand_k = random.randint(0, len(trans_lm_dict.keys()) - 1) sel_transes = trans_lm_dict[trans_lm_dict.keys()[rand_k]] num_pose = sel_transes.shape[0] if num_pose < 3: continue observed_pose_dict[observed_prefix] = {} random.shuffle(sel_classes) gen_classes = sel_classes[:num_pose] for cls_i, cls_name in enumerate(gen_classes): # if cls_name != 'driller': # continue # src_quat_mean = quat_stat[cls_name]["quat_mean"] # src_quat_std = quat_stat[cls_name]["quat_std"] # src_trans_mean = trans_stat[cls_name]["trans_mean"] # src_trans_std = trans_stat[cls_name]["trans_std"] deg_max = new_points[cls_name]["angle_max"] + 10 gen_this_pose = True # generate trans ------------------------------------------------ tgt_trans = sel_transes[cls_i].copy() # print('tgt_trans: ', tgt_trans) tgt_trans += np.random.normal(0, 0.05, 1) # r_dist, t_dist = calc_rt_dist_q(tgt_quat, src_quat, tgt_trans, src_trans) transform = np.matmul(K_lm, tgt_trans.reshape(3, 1)) center_x = float(transform[0] / transform[2]) center_y = float(transform[1] / transform[2]) count = 0 while not (0.1 < tgt_trans[2] < 1.2) or not (48 < center_x < (640 - 48) and 48 < center_y < (480 - 48)): # randomly generate a pose tgt_trans = sel_transes[cls_i].copy() tgt_trans += np.random.normal(0, 0.05, 1) transform = np.matmul(K_lm, tgt_trans.reshape(3, 1)) center_x = float(transform[0] / transform[2]) center_y = float(transform[1] / transform[2]) count += 1 if count % 500 == 0: print( observed_prefix, cls_name, count, "48 < center_x < (640-48): {}, 48 < center_y < (480-48): {}" .format(48 < center_x < (640 - 48), 48 < center_y < (480 - 48)), ) print("\tcenter_x:{}, center_y:{}, tgt_trans: {}".format( center_x, center_y, tgt_trans)) if count == 5000: gen_this_pose = False break # randomly generate a quat ------------------------------------------------- tgt_quat = np.random.normal(0, 1, 4) tgt_quat = tgt_quat / np.linalg.norm(tgt_quat) if tgt_quat[0] < 0: tgt_quat *= -1 tgt_rot_m = quat2mat(tgt_quat) new_pz = np.dot(tgt_rot_m, pz.reshape((-1, 1))).reshape((3, )) pz_mean = new_points[cls_name]["pz_mean"] deg = angle(new_pz, pz_mean) count = 0 while deg > deg_max: tgt_quat = np.random.normal(0, 1, 4) tgt_quat = tgt_quat / np.linalg.norm(tgt_quat) if tgt_quat[0] < 0: tgt_quat *= -1 tgt_rot_m = quat2mat(tgt_quat) new_pz = np.dot(tgt_rot_m, pz.reshape((-1, 1))).reshape((3, )) pz_mean = new_points[cls_name]["pz_mean"] deg = angle(new_pz, pz_mean) count += 1 if count % 100 == 0: print( observed_prefix, cls_name, count, "deg < deg_max={}: {}".format(deg_max, deg <= deg_max), ) print("\tdeg:{}".format(deg)) if count == 5000: gen_this_pose = False break # --------------------------------------------------------------------------------- if gen_this_pose: tgt_pose_q = np.zeros((7, ), dtype="float32") tgt_pose_q[:4] = tgt_quat tgt_pose_q[4:] = tgt_trans observed_pose_dict[observed_prefix][cls_name] = tgt_pose_q i = 0 for k, v in observed_pose_dict.items(): if len(v.keys()) >= 2: i += 1 print("{} indices are successfully generated.".format(i)) # write pose poses_file = os.path.join(syn_pose_dir, "LM6d_occ_dsm_train_observed_pose_all.pkl") with open(poses_file, "wb") as f: cPickle.dump(observed_pose_dict, f, 2)
def main(): gen_images = True for class_idx, class_name in enumerate(tqdm(classes)): train_pair = [] print("start ", class_name) if class_name in ["__back_ground__"]: continue if gen_images: # init render machine model_dir = os.path.join( cur_path, "../data/LINEMOD_6D/LM6d_converted/models/{}".format( class_name), ) render_machine = Render_Py(model_dir, K, width, height, ZNEAR, ZFAR) for set_type in ["NDtrain"]: # observed index list observed_list_path = os.path.join( observed_set_dir, "NDtrain_observed_{}.txt".format(class_name)) with open(observed_list_path, "r") as f: observed_list = [x.strip() for x in f.readlines()] # rendered poses rendered_pose_path = os.path.join( rendered_pose_dir, "LM6d_occ_dsm_{}_NDtrain_rendered_pose_{}.txt".format( version, class_name), ) with open(rendered_pose_path, "r") as f: str_rendered_pose_list = [ x.strip().split(" ") for x in f.readlines() ] rendered_pose_list = np.array( [[float(x) for x in each_pose] for each_pose in str_rendered_pose_list]) rendered_per_observed = 1 assert len(rendered_pose_list ) == 1 * len(observed_list), "{} vs {}".format( len(rendered_pose_list), len(observed_list)) for idx, observed_index in enumerate(tqdm(observed_list)): video_name, observed_prefix = observed_index.split( "/") # ./prefix rendered_dir = os.path.join(rendered_root_dir, video_name) mkdir_if_missing(rendered_dir) rendered_dir = os.path.join(rendered_dir, class_name) mkdir_if_missing(rendered_dir) for inner_idx in range(rendered_per_observed): if gen_images: image_file = os.path.join( rendered_dir, "{}_{}-color.png".format(observed_prefix, inner_idx), ) depth_file = os.path.join( rendered_dir, "{}_{}-depth.png".format(observed_prefix, inner_idx), ) # if os.path.exists(image_file) and os.path.exists(depth_file): # continue rendered_idx = idx * rendered_per_observed + inner_idx pose_rendered_q = rendered_pose_list[rendered_idx] rgb_gl, depth_gl = render_machine.render( pose_rendered_q[:4], pose_rendered_q[4:]) rgb_gl = rgb_gl.astype("uint8") depth_gl = (depth_gl * depth_factor).astype(np.uint16) cv2.imwrite(image_file, rgb_gl) cv2.imwrite(depth_file, depth_gl) pose_rendered_file = os.path.join( rendered_dir, "{}_{}-pose.txt".format(observed_prefix, inner_idx), ) text_file = open(pose_rendered_file, "w") text_file.write("{}\n".format(class2idx(class_name))) pose_rendered_m = np.zeros((3, 4)) pose_rendered_m[:, :3] = se3.quat2mat( pose_rendered_q[:4]) pose_rendered_m[:, 3] = pose_rendered_q[4:] pose_ori_m = pose_rendered_m pose_str = "{} {} {} {}\n{} {} {} {}\n{} {} {} {}".format( pose_ori_m[0, 0], pose_ori_m[0, 1], pose_ori_m[0, 2], pose_ori_m[0, 3], pose_ori_m[1, 0], pose_ori_m[1, 1], pose_ori_m[1, 2], pose_ori_m[1, 3], pose_ori_m[2, 0], pose_ori_m[2, 1], pose_ori_m[2, 2], pose_ori_m[2, 3], ) text_file.write(pose_str) train_pair.append("{} {}/{}_{}".format( observed_index, class_name, observed_prefix, inner_idx)) pair_set_file = os.path.join(image_set_dir, "train_{}.txt".format(class_name)) train_pair = sorted(train_pair) with open(pair_set_file, "w") as text_file: for x in train_pair: text_file.write("{}\n".format(x)) print(class_name, " done")
def gen_gt_observed(): with open(syn_poses_path, "rb") as f: syn_pose_dict = cPickle.load(f) for class_idx, class_name in enumerate(classes): if class_name == "__back_ground__": continue # uncomment here to only generate data for ape # if class_name not in ['ape']: # continue # init render machines # brightness_ratios = [0.2, 0.25, 0.3, 0.35, 0.4] ################### model_dir = os.path.join(LINEMOD_syn_root, "models/{}".format(class_name)) render_machine = Render_Py(model_dir, K, width, height, ZNEAR, ZFAR) # syn_poses_path = os.path.join(syn_poses_dir, 'LM6d_v1_all_rendered_pose_{}.txt'.format(class_name)) # syn_poses = np.loadtxt(syn_poses_path) # print(syn_poses.shape) # nx7 syn_poses = syn_pose_dict[class_name] num_poses = syn_poses.shape[0] observed_index_list = [ "{}/{:06d}".format(class_name, i + 1) for i in range(num_poses) ] # observed_set_path = os.path.join( # image_set_dir, # 'observed/LM_data_syn_train_observed_{}.txt'.format(class_name)) # mkdir_if_missing(os.path.join(image_set_dir, 'observed')) # f_observed_set = open(observed_set_path, 'w') for idx, observed_index in enumerate(tqdm(observed_index_list)): # f_observed_set.write('{}\n'.format(observed_index)) # continue # just generate observed set file prefix = observed_index.split("/")[1] gt_observed_dir = os.path.join(gt_observed_root_dir, class_name) mkdir_if_missing(gt_observed_dir) gt_observed_color_file = os.path.join(gt_observed_dir, prefix + "-color.png") gt_observed_depth_file = os.path.join(gt_observed_dir, prefix + "-depth.png") gt_observed_pose_file = os.path.join(gt_observed_dir, prefix + "-pose.txt") # observed_label_file = os.path.join(observed_root_dir, video_name, prefix + "-label.png") gt_observed_label_file = os.path.join(gt_observed_dir, prefix + "-label.png") pose_quat = syn_poses[idx, :] pose = se3.se3_q2m(pose_quat) # generate random light_position if idx % 6 == 0: light_position = [1, 0, 1] elif idx % 6 == 1: light_position = [1, 1, 1] elif idx % 6 == 2: light_position = [0, 1, 1] elif idx % 6 == 3: light_position = [-1, 1, 1] elif idx % 6 == 4: light_position = [-1, 0, 1] elif idx % 6 == 5: light_position = [0, 0, 1] else: raise Exception("???") # print( "light_position a: {}".format(light_position)) light_position = np.array(light_position) * 0.5 # inverse yz light_position[0] += pose[0, 3] light_position[1] -= pose[1, 3] light_position[2] -= pose[2, 3] # print("light_position b: {}".format(light_position)) # get render result rgb_gl, depth_gl = render_machine.render(pose[:3, :3], pose[:, 3], r_type="mat") rgb_gl = rgb_gl.astype("uint8") # gt_observed label label_gl = np.zeros(depth_gl.shape) # print('depth gl:', depth_gl.shape) label_gl[depth_gl != 0] = 1 cv2.imwrite(gt_observed_color_file, rgb_gl) depth_gl = (depth_gl * depth_factor).astype(np.uint16) cv2.imwrite(gt_observed_depth_file, depth_gl) cv2.imwrite(gt_observed_label_file, label_gl) text_file = open(gt_observed_pose_file, "w") text_file.write("{}\n".format(class_idx)) pose_str = "{} {} {} {}\n{} {} {} {}\n{} {} {} {}".format( pose[0, 0], pose[0, 1], pose[0, 2], pose[0, 3], pose[1, 0], pose[1, 1], pose[1, 2], pose[1, 3], pose[2, 0], pose[2, 1], pose[2, 2], pose[2, 3], ) text_file.write(pose_str) print(class_name, " done")
[0, 0, 1]]) # for lm ZNEAR = 0.25 ZFAR = 6.0 depth_factor = 1000 LINEMOD_root = os.path.join(cur_path, "../data/LINEMOD_6D/LM6d_converted/LM6d_refine") LINEMOD_syn_root = os.path.join( cur_path, "../data/LINEMOD_6D/LM6d_converted/LM6d_refine_syn") syn_poses_path = os.path.join(LINEMOD_syn_root, "poses/LM6d_ds_train_observed_pose_all.pkl") # output path gt_observed_root_dir = os.path.join(LINEMOD_syn_root, "data", "gt_observed") mkdir_if_missing(gt_observed_root_dir) def gen_gt_observed(): with open(syn_poses_path, "rb") as f: syn_pose_dict = cPickle.load(f) for class_idx, class_name in enumerate(classes): if class_name == "__back_ground__": continue # uncomment here to only generate data for ape # if class_name not in ['ape']: # continue # init render machines # brightness_ratios = [0.2, 0.25, 0.3, 0.35, 0.4] ###################
def main(camera_params, env_params): width = camera_params['camera_width'] height = camera_params['camera_height'] K = np.array([[camera_params['camera_fx'], 0, camera_params['camera_cx']], [0, camera_params['camera_fy'], camera_params['camera_cy']], [0, 0, 1]]) ZNEAR = camera_params['camera_znear'] ZFAR = camera_params['camera_zfar'] depth_factor = 1000 x_min = float(env_params['x_min']) x_max = float(env_params['x_max']); y_min = float(env_params['y_min']); y_max = float(env_params['y_max']); table_height = float(env_params['table_height']); gen_images = True pose_from_file = False print("Camera Matrix:") print(K) # camera_pose = np.array([ \ # [0.868216, 6.3268e-06, 0.496186, 0.436202], \ # [-5.49302e-06, 1, -3.13929e-06, 0.0174911], \ # [-0.496186, 2.74908e-11, 0.868216, 0.709983], \ # [0, 0, 0, 1]]) # Camera to world transform # camera_pose = np.array([ \ # [0.0068906 , -0.497786, 0.867272 , 0.435696], \ # [-0.999953, 0.0024452, 0.00934823, 0.0323318], \ # [-0.00677407, -0.867296, -0.497746, 0.710332], \ # [0, 0, 0, 1]]) camera_pose = np.array([ \ [0.00572327, -0.629604, 0.776895, 0.437408], \ [-0.999953, 0.00244603, 0.0093488, 0.0323317], \ [-0.00778635, -0.776912, -0.629561, 0.709281], \ [0, 0, 0, 1]]) # # camera_pose = np.array([ \ # [0.778076, 6.3268e-06, 0.628171, 0.43785], \ # [-4.92271e-06, 1, -3.97433e-06, 0.0174995], \ # [ -0.628171, 2.70497e-11, 0.778076, 0.708856], \ # [ 0, 0, 0, 1]]) # cam_to_body = np.array([[ 0, 0, 1, 0], # [-1, 0, 0, 0], # [0, -1, 0, 0], # [0, 0, 0, 1]]); for class_idx, class_name in idx2class.items(): print("start ", class_idx, class_name) if class_name in ["__back_ground__"]: continue if gen_images: # init render # model_dir = os.path.join(LM6d_root, "aligned_cm", class_name, "google_16k") model_dir = os.path.join(LM6d_root, "models", class_name) render_machine = Render_Py(model_dir, K, width, height, ZNEAR, ZFAR) for set_type in ["all"]: rendered_pose_list = [] # For reading in Perch rendered_pose_list_out = [] if pose_from_file: with open(rendered_pose_path.format(set_type, class_name)) as f: str_rendered_pose_list = [x.strip().split(" ") for x in f.readlines()] rendered_pose_list = np.array( [[float(x) for x in each_pose] for each_pose in str_rendered_pose_list] ) else: for x in np.arange(x_min, x_max, float(env_params['search_resolution_translation'])): for y in np.arange(y_min, y_max, float(env_params['search_resolution_translation'])): for theta in np.arange(0, 2 * np.pi, float(env_params['search_resolution_yaw'])): original_point = np.array([[x], [y], [table_height], [1]]) if class_name == "004_sugar_box": # Add half the height of box to shift it up point = np.array([[x], [y], [table_height+0.086], [1]]) if class_name == "035_power_drill": point = np.array([[x], [y], [table_height], [1]]) # transformed_point = np.matmul(np.linalg.inv(camera_pose), point) # transformed_rotation = np.matmul(np.linalg.inv(camera_pose[0:3, 0:3]), RT_transform.euler2mat(0,0,theta)) # transformed_rotation = np.linalg.inv(camera_pose)[0:3, 0:3] # transformed_rotation = RT_transform.euler2mat(0,0,0) # print(transformed_point) object_world_transform = np.zeros((4,4)) if class_name == "004_sugar_box": object_world_transform[:3,:3] = RT_transform.euler2mat(0,0,theta) if class_name == "035_power_drill": object_world_transform[:3,:3] = RT_transform.euler2mat(np.pi/2,0,theta) object_world_transform[:4,3] = point.flatten() # print(world_object_transform) # First apply world to object transform on the object and then take it to camera frame total_transform = np.matmul(np.linalg.inv(camera_pose), object_world_transform) print(total_transform) pose = RT_transform.mat2quat(total_transform[:3,:3]).tolist() + total_transform[:3,3].flatten().tolist() # pose = RT_transform.mat2quat(transformed_rotation).tolist() + transformed_point.flatten()[0:3].tolist() print(pose) rendered_pose_list.append(pose) # rendered_pose_list_out.append(point.flatten().tolist() + [0,0,theta]) rendered_pose_list_out.append(original_point.flatten().tolist() + [0,0,theta]) rendered_pose_list = np.array(rendered_pose_list) rendered_pose_list_out = np.array(rendered_pose_list_out) for idx, observed_pose in enumerate(tqdm(rendered_pose_list)): # print(idx) # print(observed_pose) rendered_dir = os.path.join(rendered_root_dir, class_name) mkdir_if_missing(rendered_dir) if gen_images: image_file = os.path.join( rendered_dir, "{}-color.png".format(idx), ) depth_file = os.path.join( rendered_dir, "{}-depth.png".format(idx), ) pose_rendered_q = observed_pose # print(pose_rendered_q[4:]) rgb_gl, depth_gl = render_machine.render( pose_rendered_q[:4], pose_rendered_q[4:] ) rgb_gl = rgb_gl.astype("uint8") depth_gl = (depth_gl * depth_factor).astype(np.uint16) cv2.imwrite(image_file, rgb_gl) cv2.imwrite(depth_file, depth_gl) # pose_rendered_file = os.path.join( # rendered_dir, # "{}-pose.txt".format(idx), # ) # text_file = open(pose_rendered_file, "w") # text_file.write("{}\n".format(class_idx)) # pose_rendered_m = np.zeros((3, 4)) # pose_rendered_m[:, :3] = RT_transform.quat2mat( # pose_rendered_q[:4] # ) # pose_rendered_m[:, 3] = pose_rendered_q[4:] # pose_ori_m = pose_rendered_m # pose_str = "{} {} {} {}\n{} {} {} {}\n{} {} {} {}".format( # pose_ori_m[0, 0], # pose_ori_m[0, 1], # pose_ori_m[0, 2], # pose_ori_m[0, 3], # pose_ori_m[1, 0], # pose_ori_m[1, 1], # pose_ori_m[1, 2], # pose_ori_m[1, 3], # pose_ori_m[2, 0], # pose_ori_m[2, 1], # pose_ori_m[2, 2], # pose_ori_m[2, 3], # ) # text_file.write(pose_str) pose_rendered_file = os.path.join( rendered_dir, "poses.txt", ) np.savetxt(pose_rendered_file, np.around(rendered_pose_list_out, 4)) # text_file = open(pose_rendered_file, "w") # text_file.write(rendered_pose_list) print(class_name, " done")
def main(): for cls_idx, cls_name in enumerate(tqdm(sel_classes)): print(cls_idx, cls_name) keyframe_path = os.path.join( observed_set_dir, "train_observed_{}.txt".format(cls_name) ) with open(keyframe_path) as f: observed_index_list = [x.strip() for x in f.readlines()] video_name_list = [x.split("/")[0] for x in observed_index_list] observed_prefix_list = [x.split("/")[1] for x in observed_index_list] # init renderer model_dir = os.path.join(model_root, cls_name) render_machine = Render_Py(model_dir, K, width, height, ZNEAR, ZFAR) for idx, observed_index in enumerate(tqdm(observed_index_list)): prefix = observed_prefix_list[idx] video_name = video_name_list[idx] gt_observed_dir = os.path.join(gt_observed_root_dir, cls_name) mkdir_if_missing(gt_observed_dir) gt_observed_dir = os.path.join(gt_observed_dir, video_name) # ./ mkdir_if_missing(gt_observed_dir) # to be written gt_observed_color_file = os.path.join( gt_observed_dir, prefix + "-color.png" ) gt_observed_depth_file = os.path.join( gt_observed_dir, prefix + "-depth.png" ) gt_observed_pose_file = os.path.join(gt_observed_dir, prefix + "-pose.txt") gt_observed_label_file = os.path.join( gt_observed_dir, prefix + "-label.png" ) observed_pose_file = os.path.join( observed_root_dir, video_name, prefix + "-poses.npy" ) observed_poses = np.load(observed_pose_file) observed_pose_dict = observed_poses.all() # pprint(observed_pose_dict) if cls_name not in observed_pose_dict.keys(): continue pose = observed_pose_dict[cls_name] rgb_gl, depth_gl = render_machine.render( RT_transform.mat2quat(pose[:3, :3]), pose[:, -1] ) rgb_gl = rgb_gl.astype("uint8") label_gl = np.zeros(depth_gl.shape) label_gl[depth_gl != 0] = 1 depth_gl = depth_gl * depth_factor depth_gl = depth_gl.astype("uint16") # write results cv2.imwrite(gt_observed_color_file, rgb_gl) cv2.imwrite(gt_observed_depth_file, depth_gl) cv2.imwrite(gt_observed_label_file, label_gl) text_file = open(gt_observed_pose_file, "w") text_file.write("{}\n".format(cls_idx)) pose_str = "{} {} {} {}\n{} {} {} {}\n{} {} {} {}".format( pose[0, 0], pose[0, 1], pose[0, 2], pose[0, 3], pose[1, 0], pose[1, 1], pose[1, 2], pose[1, 3], pose[2, 0], pose[2, 1], pose[2, 2], pose[2, 3], ) text_file.write(pose_str) print(cls_name, " done")
def main(): args = parse_args() exp_dir = args.exp_dir process_images = True print('exp_dir: ', exp_dir) ctx = mx.gpu(0) pixel_means = np.array([0, 0, 0]) height = 600 width = 800 # initialize layer image_real_sym = mx.sym.Variable('image_real') image_rendered_sym = mx.sym.Variable('image_rendered') zoom_factor_sym = mx.sym.Variable('zoom_factor') zoom_op = mx.sym.Custom(zoom_factor=zoom_factor_sym, image_real=image_real_sym, image_rendered=image_rendered_sym, pixel_means=pixel_means.flatten(), name='updater', op_type='ZoomImageWithFactor', height=height, width=width, high_light_center=False) pose_dirs = [ os.path.join(exp_dir, d) for d in os.listdir(exp_dir) if 'pose' in d ] pose_dirs = sorted(pose_dirs) pose_path_list = [] for pose_dir in pose_dirs: files = [ os.path.join(pose_dir, fn) for fn in os.listdir(pose_dir) if '.png' in fn ] files = sorted(files) for i in range(len(files)): if i == 0 or i == len(files) - 1: for j in range(5): pose_path_list.append(files[i]) else: pose_path_list.append(files[i]) save_dir = os.path.join(exp_dir, '../zoom_video_iter/') mkdir_if_missing(save_dir) # zoom in def get_zoom_iamges(image_path, save_dir, is_initial=False, is_last=False, use_first_zoom_factor=True): legend_loc = 50 t = 1 cmap = { 1: [1.0, 0.0, 0.0, t], 2: [0.75, 0.75, 0.75, t], 3: [0.0, 1.0, 0.0, t] } labels = {1: 'Initial', 2: 'GT', 3: 'Refined'} patches = [ mpatches.Patch(color=cmap[i], label=labels[i]) for i in range(1, 4) ] if use_first_zoom_factor: init_info_path = image_path.replace( os.path.basename( image_path)[os.path.basename(image_path).find('iter'):], 'iter_00_info.txt') _, _, zoom_factor = read_info(init_info_path) info_path = image_path.replace('.png', '_info.txt') title, legend, _ = read_info(info_path) else: info_path = image_path.replace('.png', '_info.txt') title, legend, zoom_factor = read_info(info_path) zoom_factor = zoom_factor[None, :] # print(zoom_factor) image_real = cv2.imread(image_path, cv2.IMREAD_COLOR).transpose([2, 0, 1])[None, :, :, :] image_rendered = image_real.copy() exe1 = zoom_op.simple_bind(ctx=ctx, zoom_factor=zoom_factor.shape, image_real=image_real.shape, image_rendered=image_rendered.shape) def simple_forward(exe1, zoom_factor, image_real, image_rendered, ctx=ctx, is_train=False): print('zoom factor: ', zoom_factor) exe1.arg_dict['zoom_factor'][:] = mx.nd.array(zoom_factor, ctx=ctx) exe1.arg_dict['image_real'][:] = mx.nd.array(image_real, ctx=ctx) exe1.arg_dict['image_rendered'][:] = mx.nd.array(image_rendered, ctx=ctx) exe1.forward(is_train=is_train) if is_initial: # original fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) # print(image_real[0].shape) ax.imshow(image_real[0].transpose((1, 2, 0))[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) # plt.show() save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_0.png')) plt.savefig(save_path, aspect='normal') plt.close() # ################### (1/3) wx, wy, tx, ty = zoom_factor[0] delta = (1 - wx) / 3 zoom_factor_1 = np.zeros((1, 4)) zoom_factor_1[0, 0] = 1 - delta zoom_factor_1[0, 1] = 1 - delta zoom_factor_1[0, 2] = tx / 3 zoom_factor_1[0, 3] = ty / 3 simple_forward(exe1, zoom_factor_1, image_real, image_rendered, ctx=ctx, is_train=True) zoom_image_real = exe1.outputs[0].asnumpy()[0].transpose( (1, 2, 0)) + pixel_means zoom_image_real[zoom_image_real < 0] = 0 zoom_image_real[zoom_image_real > 255] = 255 zoom_image_real = zoom_image_real.astype('uint8') fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.imshow(zoom_image_real[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_1.png')) plt.savefig(save_path, aspect='normal') # plt.show() plt.close() # #################### (2/3) zoom_factor_2 = np.zeros((1, 4)) zoom_factor_2[0, 0] = 1 - 2 * delta zoom_factor_2[0, 1] = 1 - 2 * delta zoom_factor_2[0, 2] = tx / 3 * 2 zoom_factor_2[0, 3] = ty / 3 * 2 simple_forward(exe1, zoom_factor_2, image_real, image_rendered, ctx=ctx, is_train=True) zoom_image_real = exe1.outputs[0].asnumpy()[0].transpose( (1, 2, 0)) + pixel_means zoom_image_real[zoom_image_real < 0] = 0 zoom_image_real[zoom_image_real > 255] = 255 zoom_image_real = zoom_image_real.astype('uint8') fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.imshow(zoom_image_real[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_2.png')) plt.savefig(save_path, aspect='normal') # plt.show() plt.close() # ###################### (3/3) simple_forward(exe1, zoom_factor, image_real, image_rendered, ctx=ctx, is_train=True) zoom_image_real = exe1.outputs[0].asnumpy()[0].transpose( (1, 2, 0)) + pixel_means zoom_image_real[zoom_image_real < 0] = 0 zoom_image_real[zoom_image_real > 255] = 255 zoom_image_real = zoom_image_real.astype('uint8') fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.imshow(zoom_image_real[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) if is_initial: save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_3.png')) # plt.show() plt.savefig(save_path, aspect='normal') plt.close() elif is_last: save_path_0 = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_0.png')) save_path_1 = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_1.png')) save_path_2 = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_2.png')) plt.savefig(save_path_0, aspect='normal') plt.savefig(save_path_1, aspect='normal') plt.savefig(save_path_2, aspect='normal') # plt.show() plt.close() else: save_path = os.path.join(save_d, os.path.basename(image_path)) plt.savefig(save_path, aspect='normal') # plt.show() plt.close() if process_images: use_first_zoom_factor = False print('saving processed images to {}'.format(save_dir)) for image_path in tqdm(pose_path_list): # image_path = pose_path_list[0] if 'iter_00' in image_path: is_initial = True else: is_initial = False if 'iter_04' in image_path: is_last = True else: is_last = False # if 'iter_05' in image_path: # gt # continue get_zoom_iamges(image_path, save_dir=save_dir, is_initial=is_initial, is_last=is_last, use_first_zoom_factor=use_first_zoom_factor) ######################################## # generate video with new images new_pose_dirs = [ os.path.join(save_dir, d) for d in os.listdir(save_dir) if 'pose' in d ] new_pose_dirs = sorted(new_pose_dirs) new_pose_path_list = [] for new_pose_dir in new_pose_dirs: files = [ os.path.join(new_pose_dir, fn) for fn in os.listdir(new_pose_dir) if '.png' in fn ] files = sorted(files) for i in range(len(files)): if i == 0 or i == len(files) - 1: for j in range(1): new_pose_path_list.append(files[i]) else: new_pose_path_list.append(files[i]) N = len(new_pose_path_list) images_dict = {k: [] for k in ['pose']} print('loading images...') for i in tqdm(range(N)): images_dict['pose'].append( cv2.imread(new_pose_path_list[i], cv2.IMREAD_COLOR)) height, width, channel = images_dict['pose'][0].shape print(height, width) width = 800 height = 600 fourcc = cv2.VideoWriter_fourcc(*'MJPG') video_pose_zoom = cv2.VideoWriter( os.path.join(exp_dir, '../video_full/pose_iter_zoom.avi'), fourcc, 2.0, (width, height)) print('writing video...') for i in tqdm(range(N)): res_img = images_dict['pose'][i] if res_img.shape[0] == 480: im_scale = 600.0 / 480.0 res_img = cv2.resize(res_img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_CUBIC) video_pose_zoom.write(res_img) video_pose_zoom.release() os.popen( 'ffmpeg -i {} -vcodec mpeg4 -acodec copy -preset placebo -crf 1 -b:v 1550k {}' .format( os.path.join(exp_dir, '../video_full/pose_iter_zoom.avi'), os.path.join(exp_dir, '../video_full/pose_iter_zoom_compressed.avi')))
def get_zoom_iamges(image_path, save_dir, is_initial=False, is_last=False, use_first_zoom_factor=True): legend_loc = 50 t = 1 cmap = { 1: [1.0, 0.0, 0.0, t], 2: [0.75, 0.75, 0.75, t], 3: [0.0, 1.0, 0.0, t] } labels = {1: 'Initial', 2: 'GT', 3: 'Refined'} patches = [ mpatches.Patch(color=cmap[i], label=labels[i]) for i in range(1, 4) ] if use_first_zoom_factor: init_info_path = image_path.replace( os.path.basename( image_path)[os.path.basename(image_path).find('iter'):], 'iter_00_info.txt') _, _, zoom_factor = read_info(init_info_path) info_path = image_path.replace('.png', '_info.txt') title, legend, _ = read_info(info_path) else: info_path = image_path.replace('.png', '_info.txt') title, legend, zoom_factor = read_info(info_path) zoom_factor = zoom_factor[None, :] # print(zoom_factor) image_real = cv2.imread(image_path, cv2.IMREAD_COLOR).transpose([2, 0, 1])[None, :, :, :] image_rendered = image_real.copy() exe1 = zoom_op.simple_bind(ctx=ctx, zoom_factor=zoom_factor.shape, image_real=image_real.shape, image_rendered=image_rendered.shape) def simple_forward(exe1, zoom_factor, image_real, image_rendered, ctx=ctx, is_train=False): print('zoom factor: ', zoom_factor) exe1.arg_dict['zoom_factor'][:] = mx.nd.array(zoom_factor, ctx=ctx) exe1.arg_dict['image_real'][:] = mx.nd.array(image_real, ctx=ctx) exe1.arg_dict['image_rendered'][:] = mx.nd.array(image_rendered, ctx=ctx) exe1.forward(is_train=is_train) if is_initial: # original fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) # print(image_real[0].shape) ax.imshow(image_real[0].transpose((1, 2, 0))[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) # plt.show() save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_0.png')) plt.savefig(save_path, aspect='normal') plt.close() # ################### (1/3) wx, wy, tx, ty = zoom_factor[0] delta = (1 - wx) / 3 zoom_factor_1 = np.zeros((1, 4)) zoom_factor_1[0, 0] = 1 - delta zoom_factor_1[0, 1] = 1 - delta zoom_factor_1[0, 2] = tx / 3 zoom_factor_1[0, 3] = ty / 3 simple_forward(exe1, zoom_factor_1, image_real, image_rendered, ctx=ctx, is_train=True) zoom_image_real = exe1.outputs[0].asnumpy()[0].transpose( (1, 2, 0)) + pixel_means zoom_image_real[zoom_image_real < 0] = 0 zoom_image_real[zoom_image_real > 255] = 255 zoom_image_real = zoom_image_real.astype('uint8') fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.imshow(zoom_image_real[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_1.png')) plt.savefig(save_path, aspect='normal') # plt.show() plt.close() # #################### (2/3) zoom_factor_2 = np.zeros((1, 4)) zoom_factor_2[0, 0] = 1 - 2 * delta zoom_factor_2[0, 1] = 1 - 2 * delta zoom_factor_2[0, 2] = tx / 3 * 2 zoom_factor_2[0, 3] = ty / 3 * 2 simple_forward(exe1, zoom_factor_2, image_real, image_rendered, ctx=ctx, is_train=True) zoom_image_real = exe1.outputs[0].asnumpy()[0].transpose( (1, 2, 0)) + pixel_means zoom_image_real[zoom_image_real < 0] = 0 zoom_image_real[zoom_image_real > 255] = 255 zoom_image_real = zoom_image_real.astype('uint8') fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.imshow(zoom_image_real[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_2.png')) plt.savefig(save_path, aspect='normal') # plt.show() plt.close() # ###################### (3/3) simple_forward(exe1, zoom_factor, image_real, image_rendered, ctx=ctx, is_train=True) zoom_image_real = exe1.outputs[0].asnumpy()[0].transpose( (1, 2, 0)) + pixel_means zoom_image_real[zoom_image_real < 0] = 0 zoom_image_real[zoom_image_real > 255] = 255 zoom_image_real = zoom_image_real.astype('uint8') fig = plt.figure(frameon=False, figsize=(8, 6), dpi=100) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.imshow(zoom_image_real[:, :, [2, 1, 0]]) fig.gca().text(10, 25, title, color='green', bbox=dict(facecolor='white', alpha=0.8)) fig.gca().text(10, legend_loc, legend, color='red', bbox=dict(facecolor='white', alpha=0.8)) plt.legend(handles=patches, loc=4, borderaxespad=0.) save_d = os.path.join(save_dir, os.path.dirname(image_path).split('/')[-1]) mkdir_if_missing(save_d) if is_initial: save_path = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_3.png')) # plt.show() plt.savefig(save_path, aspect='normal') plt.close() elif is_last: save_path_0 = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_0.png')) save_path_1 = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_1.png')) save_path_2 = os.path.join( save_d, os.path.basename(image_path).replace('.png', '_2.png')) plt.savefig(save_path_0, aspect='normal') plt.savefig(save_path_1, aspect='normal') plt.savefig(save_path_2, aspect='normal') # plt.show() plt.close() else: save_path = os.path.join(save_d, os.path.basename(image_path)) plt.savefig(save_path, aspect='normal') # plt.show() plt.close()
def pred_eval(config, predictor, test_data, imdb_test, vis=False, ignore_cache=None, logger=None, pairdb=None): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb_test: image database :param vis: controls visualization :param ignore_cache: ignore the saved cache file :param logger: the logger instance :return: """ print(imdb_test.result_path) print('test iter size: ', config.TEST.test_iter) pose_err_file = os.path.join( imdb_test.result_path, imdb_test.name + '_pose_iter{}.pkl'.format(config.TEST.test_iter)) if os.path.exists(pose_err_file) and not ignore_cache and not vis: with open(pose_err_file, 'rb') as fid: if six.PY3: [all_rot_err, all_trans_err, all_poses_est, all_poses_gt] = cPickle.load(fid, encoding='latin1') else: [all_rot_err, all_trans_err, all_poses_est, all_poses_gt] = cPickle.load(fid) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) pose_add_plots_dir = os.path.join(imdb_test.result_path, 'add_plots') mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add(config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, 'arp_2d_plots') mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d(config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger) return assert vis or not test_data.shuffle assert config.TEST.BATCH_PAIRS == 1 if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) num_pairs = len(pairdb) height = 480 width = 640 data_time, net_time, post_time = 0.0, 0.0, 0.0 sum_EPE_all = 0.0 num_inst_all = 0.0 sum_EPE_viz = 0.0 num_inst_viz = 0.0 sum_EPE_vizbg = 0.0 num_inst_vizbg = 0.0 sum_PoseErr = [ np.zeros((len(imdb_test.classes) + 1, 2)) for batch_idx in range(config.TEST.test_iter) ] all_rot_err = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes)) ] # num_cls x test_iter all_trans_err = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes))] all_poses_est = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes))] all_poses_gt = [[[] for j in range(config.TEST.test_iter)] for batch_idx in range(len(imdb_test.classes))] num_inst = np.zeros(len(imdb_test.classes) + 1) K = config.dataset.INTRINSIC_MATRIX if (config.TEST.test_iter > 1 or config.TEST.VISUALIZE) and True: print( "************* start setup render_glumpy environment... ******************" ) if config.dataset.dataset.startswith('ModelNet'): from lib.render_glumpy.render_py_light_modelnet_multi import Render_Py_Light_ModelNet_Multi modelnet_root = config.modelnet_root texture_path = os.path.join(modelnet_root, 'gray_texture.png') model_path_list = [ os.path.join(config.dataset.model_dir, '{}.obj'.format(model_name)) for model_name in config.dataset.class_name ] render_machine = Render_Py_Light_ModelNet_Multi( model_path_list, texture_path, K, width, height, config.dataset.ZNEAR, config.dataset.ZFAR, brightness_ratios=[0.7]) else: render_machine = Render_Py(config.dataset.model_dir, config.dataset.class_name, K, width, height, config.dataset.ZNEAR, config.dataset.ZFAR) def render(render_machine, pose, cls_idx, K=None): if config.dataset.dataset.startswith('ModelNet'): idx = 2 # generate random light_position if idx % 6 == 0: light_position = [1, 0, 1] elif idx % 6 == 1: light_position = [1, 1, 1] elif idx % 6 == 2: light_position = [0, 1, 1] elif idx % 6 == 3: light_position = [-1, 1, 1] elif idx % 6 == 4: light_position = [-1, 0, 1] elif idx % 6 == 5: light_position = [0, 0, 1] else: raise Exception("???") light_position = np.array(light_position) * 0.5 # inverse yz light_position[0] += pose[0, 3] light_position[1] -= pose[1, 3] light_position[2] -= pose[2, 3] colors = np.array([1, 1, 1]) # white light intensity = np.random.uniform(0.9, 1.1, size=(3, )) colors_randk = 0 light_intensity = colors[colors_randk] * intensity # randomly choose a render machine rm_randk = 0 # random.randint(0, len(brightness_ratios) - 1) rgb_gl, depth_gl = render_machine.render(cls_idx, pose[:3, :3], pose[:3, 3], light_position, light_intensity, brightness_k=rm_randk, r_type='mat') rgb_gl = rgb_gl.astype('uint8') else: rgb_gl, depth_gl = render_machine.render(cls_idx, pose[:3, :3], pose[:, 3], r_type='mat', K=K) rgb_gl = rgb_gl.astype('uint8') return rgb_gl, depth_gl print( "***************setup render_glumpy environment succeed ******************" ) if config.TEST.PRECOMPUTED_ICP: print('precomputed_ICP') config.TEST.test_iter = 1 all_rot_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_est = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_gt = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] xy_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] z_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] for idx in range(len(pairdb)): pose_path = pairdb[idx]['depth_rendered'][:-10] + '-pose_icp.txt' pose_rendered_update = np.loadtxt(pose_path, skiprows=1) pose_real = pairdb[idx]['pose_observed'] r_dist_est, t_dist_est = calc_rt_dist_m(pose_rendered_update, pose_real) xy_dist = np.linalg.norm(pose_rendered_update[:2, -1] - pose_real[:2, -1]) z_dist = np.linalg.norm(pose_rendered_update[-1, -1] - pose_real[-1, -1]) print( "{}: r_dist_est: {}, t_dist_est: {}, xy_dist: {}, z_dist: {}". format(idx, r_dist_est, t_dist_est, xy_dist, z_dist)) class_id = imdb_test.classes.index(pairdb[idx]['gt_class']) # store poses estimation and gt all_poses_est[class_id][0].append(pose_rendered_update) all_poses_gt[class_id][0].append(pairdb[idx]['pose_observed']) all_rot_err[class_id][0].append(r_dist_est) all_trans_err[class_id][0].append(t_dist_est) xy_trans_err[class_id][0].append(xy_dist) z_trans_err[class_id][0].append(z_dist) all_rot_err = np.array(all_rot_err) all_trans_err = np.array(all_trans_err) print("rot = {} +/- {}".format(np.mean(all_rot_err[class_id][0]), np.std(all_rot_err[class_id][0]))) print("trans = {} +/- {}".format(np.mean(all_trans_err[class_id][0]), np.std(all_trans_err[class_id][0]))) num_list = all_trans_err[class_id][0] print("xyz: {:.2f} +/- {:.2f}".format( np.mean(num_list) * 100, np.std(num_list) * 100)) num_list = xy_trans_err[class_id][0] print("xy: {:.2f} +/- {:.2f}".format( np.mean(num_list) * 100, np.std(num_list) * 100)) num_list = z_trans_err[class_id][0] print("z: {:.2f} +/- {:.2f}".format( np.mean(num_list) * 100, np.std(num_list) * 100)) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) pose_add_plots_dir = os.path.join(imdb_test.result_path, 'add_plots_precomputed_ICP') mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add(config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, 'arp_2d_plots_precomputed_ICP') mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d(config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger) return if config.TEST.BEFORE_ICP: print('before_ICP') config.TEST.test_iter = 1 all_rot_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_est = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] all_poses_gt = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] xy_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] z_trans_err = [[[] for j in range(1)] for batch_idx in range(len(imdb_test.classes))] for idx in range(len(pairdb)): pose_path = pairdb[idx]['depth_rendered'][:-10] + '-pose.txt' pose_rendered_update = np.loadtxt(pose_path, skiprows=1) pose_real = pairdb[idx]['pose_observed'] r_dist_est, t_dist_est = calc_rt_dist_m(pose_rendered_update, pose_real) xy_dist = np.linalg.norm(pose_rendered_update[:2, -1] - pose_real[:2, -1]) z_dist = np.linalg.norm(pose_rendered_update[-1, -1] - pose_real[-1, -1]) class_id = imdb_test.classes.index(pairdb[idx]['gt_class']) # store poses estimation and gt all_poses_est[class_id][0].append(pose_rendered_update) all_poses_gt[class_id][0].append(pairdb[idx]['pose_observed']) all_rot_err[class_id][0].append(r_dist_est) all_trans_err[class_id][0].append(t_dist_est) xy_trans_err[class_id][0].append(xy_dist) z_trans_err[class_id][0].append(z_dist) all_trans_err = np.array(all_trans_err) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) pose_add_plots_dir = os.path.join(imdb_test.result_path, 'add_plots_before_ICP') mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add(config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, 'arp_2d_plots_before_ICP') mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d(config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger) return # ------------------------------------------------------------------------------ t_start = time.time() t = time.time() for idx, data_batch in enumerate(test_data): if np.sum(pairdb[idx] ['pose_rendered']) == -12: # NO POINT VALID IN INIT POSE print(idx) class_id = imdb_test.classes.index(pairdb[idx]['gt_class']) for pose_iter_idx in range(config.TEST.test_iter): all_poses_est[class_id][pose_iter_idx].append( pairdb[idx]['pose_rendered']) all_poses_gt[class_id][pose_iter_idx].append( pairdb[idx]['pose_observed']) r_dist = 1000 t_dist = 1000 all_rot_err[class_id][pose_iter_idx].append(r_dist) all_trans_err[class_id][pose_iter_idx].append(t_dist) sum_PoseErr[pose_iter_idx][class_id, :] += np.array( [r_dist, t_dist]) sum_PoseErr[pose_iter_idx][-1, :] += np.array([r_dist, t_dist]) # post process if idx % 50 == 0: print_and_log( 'testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s'. format((idx + 1), num_pairs, data_time / (idx + 1) * test_data.batch_size, net_time / (idx + 1) * test_data.batch_size, post_time / (idx + 1) * test_data.batch_size), logger) print("NO POINT_VALID IN rendered") continue data_time += time.time() - t t = time.time() pose_rendered = pairdb[idx]['pose_rendered'] if np.sum(pose_rendered) == -12: print(idx) class_id = imdb_test.classes.index(pairdb[idx]['gt_class']) num_inst[class_id] += 1 num_inst[-1] += 1 for pose_iter_idx in range(config.TEST.test_iter): all_poses_est[class_id][pose_iter_idx].append(pose_rendered) all_poses_gt[class_id][pose_iter_idx].append( pairdb[idx]['pose_observed']) # post process if idx % 50 == 0: print_and_log( 'testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s'. format((idx + 1), num_pairs, data_time / (idx + 1) * test_data.batch_size, net_time / (idx + 1) * test_data.batch_size, post_time / (idx + 1) * test_data.batch_size), logger) t = time.time() continue output_all = predictor.predict(data_batch) net_time += time.time() - t t = time.time() rst_iter = [] for output in output_all: cur_rst = {} cur_rst['se3'] = np.squeeze( output['se3_output'].asnumpy()).astype('float32') if not config.TEST.FAST_TEST and config.network.PRED_FLOW: cur_rst['flow'] = np.squeeze( output['flow_est_crop_output'].asnumpy().transpose( (2, 3, 1, 0))).astype('float16') else: cur_rst['flow'] = None if config.network.PRED_MASK and config.TEST.UPDATE_MASK not in [ 'init', 'box_rendered' ]: mask_pred = np.squeeze( output['mask_observed_pred_output'].asnumpy()).astype( 'float32') cur_rst['mask_pred'] = mask_pred rst_iter.append(cur_rst) post_time += time.time() - t sample_ratio = 1 # 0.01 for batch_idx in range(0, test_data.batch_size): # if config.TEST.VISUALIZE and not (r_dist>15 and t_dist>0.05): # continue # 3388, 5326 # calculate the flow error -------------------------------------------- t = time.time() if config.network.PRED_FLOW and not config.TEST.FAST_TEST: # evaluate optical flow flow_gt = par_generate_gt(config, pairdb[idx]) if config.network.PRED_FLOW: all_diff = calc_EPE_one_pair(rst_iter[batch_idx], flow_gt, 'flow') sum_EPE_all += all_diff['epe_all'] num_inst_all += all_diff['num_all'] sum_EPE_viz += all_diff['epe_viz'] num_inst_viz += all_diff['num_viz'] sum_EPE_vizbg += all_diff['epe_vizbg'] num_inst_vizbg += all_diff['num_vizbg'] # calculate the se3 error --------------------------------------------- # evaluate se3 estimation pose_rendered = pairdb[idx]['pose_rendered'] class_id = imdb_test.classes.index(pairdb[idx]['gt_class']) num_inst[class_id] += 1 num_inst[-1] += 1 post_time += time.time() - t # iterative refine se3 estimation -------------------------------------------------- for pose_iter_idx in range(config.TEST.test_iter): t = time.time() pose_rendered_update = RT_transform(pose_rendered, rst_iter[0]['se3'][:-3], rst_iter[0]['se3'][-3:], config.dataset.trans_means, config.dataset.trans_stds, config.network.ROT_COORD) # calculate error r_dist, t_dist = calc_rt_dist_m(pose_rendered_update, pairdb[idx]['pose_observed']) # store poses estimation and gt all_poses_est[class_id][pose_iter_idx].append( pose_rendered_update) all_poses_gt[class_id][pose_iter_idx].append( pairdb[idx]['pose_observed']) all_rot_err[class_id][pose_iter_idx].append(r_dist) all_trans_err[class_id][pose_iter_idx].append(t_dist) sum_PoseErr[pose_iter_idx][class_id, :] += np.array( [r_dist, t_dist]) sum_PoseErr[pose_iter_idx][-1, :] += np.array([r_dist, t_dist]) if config.TEST.VISUALIZE: print("idx {}, iter {}: rError: {}, tError: {}".format( idx + batch_idx, pose_iter_idx + 1, r_dist, t_dist)) post_time += time.time() - t # # if more than one iteration if pose_iter_idx < (config.TEST.test_iter - 1) or config.TEST.VISUALIZE: t = time.time() # get refined image K_path = pairdb[idx]['image_observed'][:-10] + '-K.txt' if os.path.exists(K_path): K = np.loadtxt(K_path) image_refined, depth_refined = render( render_machine, pose_rendered_update, config.dataset.class_name.index( pairdb[idx]['gt_class']), K=K) image_refined = image_refined[:, :, :3] # update minibatch update_package = [{ 'image_rendered': image_refined, 'src_pose': pose_rendered_update }] if config.network.INPUT_DEPTH: update_package[0]['depth_rendered'] = depth_refined if config.network.INPUT_MASK: mask_rendered_refined = np.zeros(depth_refined.shape) mask_rendered_refined[depth_refined > 0.2] = 1 update_package[0][ 'mask_rendered'] = mask_rendered_refined if config.network.PRED_MASK: # init, box_rendered, mask_rendered, box_real, mask_observed if config.TEST.UPDATE_MASK == 'box_rendered': input_names = [ blob_name[0] for blob_name in data_batch.provide_data[0] ] update_package[0]['mask_observed'] = np.squeeze( data_batch.data[0][input_names.index( 'mask_rendered')].asnumpy()[batch_idx]) elif config.TEST.UPDATE_MASK == 'init': pass else: raise Exception( 'Unknown UPDATE_MASK type: {}'.format( config.network.UPDATE_MASK)) pose_rendered = pose_rendered_update data_batch = update_data_batch(config, data_batch, update_package) data_time += time.time() - t # forward and get rst if pose_iter_idx < config.TEST.test_iter - 1: t = time.time() output_all = predictor.predict(data_batch) net_time += time.time() - t t = time.time() rst_iter = [] for output in output_all: cur_rst = {} if config.network.REGRESSOR_NUM == 1: cur_rst['se3'] = np.squeeze( output['se3_output'].asnumpy()).astype( 'float32') if not config.TEST.FAST_TEST and config.network.PRED_FLOW: cur_rst['flow'] = np.squeeze( output['flow_est_crop_output'].asnumpy(). transpose((2, 3, 1, 0))).astype('float16') else: cur_rst['flow'] = None if config.network.PRED_MASK and config.TEST.UPDATE_MASK not in [ 'init', 'box_rendered' ]: mask_pred = np.squeeze( output['mask_observed_pred_output']. asnumpy()).astype('float32') cur_rst['mask_pred'] = mask_pred rst_iter.append(cur_rst) post_time += time.time() - t # post process if idx % 50 == 0: print_and_log( 'testing {}/{} data {:.4f}s net {:.4f}s calc_gt {:.4f}s'. format((idx + 1), num_pairs, data_time / (idx + 1) * test_data.batch_size, net_time / (idx + 1) * test_data.batch_size, post_time / (idx + 1) * test_data.batch_size), logger) t = time.time() all_rot_err = np.array(all_rot_err) all_trans_err = np.array(all_trans_err) # save inference results if not config.TEST.VISUALIZE: with open(pose_err_file, 'wb') as f: print("saving result cache to {}".format(pose_err_file), ) cPickle.dump( [all_rot_err, all_trans_err, all_poses_est, all_poses_gt], f, protocol=2) print("done") if config.network.PRED_FLOW: print_and_log('evaluate flow:', logger) print_and_log( 'EPE all: {}'.format(sum_EPE_all / max(num_inst_all, 1.0)), logger) print_and_log( 'EPE ignore unvisible: {}'.format( sum_EPE_vizbg / max(num_inst_vizbg, 1.0)), logger) print_and_log( 'EPE visible: {}'.format(sum_EPE_viz / max(num_inst_viz, 1.0)), logger) print_and_log('evaluate pose:', logger) imdb_test.evaluate_pose(config, all_poses_est, all_poses_gt, logger) # evaluate pose add pose_add_plots_dir = os.path.join(imdb_test.result_path, 'add_plots') mkdir_if_missing(pose_add_plots_dir) imdb_test.evaluate_pose_add(config, all_poses_est, all_poses_gt, output_dir=pose_add_plots_dir, logger=logger) pose_arp2d_plots_dir = os.path.join(imdb_test.result_path, 'arp_2d_plots') mkdir_if_missing(pose_arp2d_plots_dir) imdb_test.evaluate_pose_arp_2d(config, all_poses_est, all_poses_gt, output_dir=pose_arp2d_plots_dir, logger=logger) print_and_log('using {} seconds in total'.format(time.time() - t_start), logger)
def adapt_real_train(): class_list = ["{:02d}".format(i) for i in range(1, 31)] sel_classes = ["05", "06"] width = 640 # 400 height = 480 # 400 depth_factor = 10000 K_0 = np.array( [[1075.65091572, 0, 320.0], [0, 1073.90347929, 240.0], [0, 0, 1]] ) # Primesense new_data_root = os.path.join(TLESS_root, "TLESS_render_v3/data/real") mkdir_if_missing(new_data_root) real_set_dir = os.path.join(TLESS_root, "TLESS_render_v3/image_set/real") mkdir_if_missing(real_set_dir) for cls_idx, cls_name in enumerate(class_list): if not cls_name in sel_classes: continue print(cls_idx, cls_name) model_dir = os.path.join(TLESS_root, "models", cls_name) render_machine = Render_Py(model_dir, K_0, width, height, ZNEAR, ZFAR) gt_path = os.path.join( TLESS_root, "t-less_v2/train_primesense/{}/gt.yml".format(cls_name) ) gt_dict = load_gt(gt_path) info_path = os.path.join( TLESS_root, "t-less_v2/train_primesense/{}/info.yml".format(cls_name) ) info_dict = load_info(info_path) real_indices = [] for img_id in tqdm(gt_dict.keys()): R = np.array(gt_dict[img_id][0]["cam_R_m2c"]).reshape((3, 3)) t = np.array(gt_dict[img_id][0]["cam_t_m2c"]) / 1000.0 K = np.array(info_dict[img_id]["cam_K"]).reshape((3, 3)) # K[0, 2] += 120 # cx # K[1, 2] += 40 # cy pose = np.zeros((3, 4)) pose[:3, :3] = R pose[:3, 3] = t # print(pose) # print(K) K_diff = K_0 - K cx_diff = K_diff[0, 2] cy_diff = K_diff[1, 2] px_diff = int(np.round(cx_diff)) py_diff = int(np.round(cy_diff)) # pose ---------------- pose_path = os.path.join( new_data_root, cls_name, "{:06d}-pose.txt".format(img_id) ) mkdir_if_missing(os.path.join(new_data_root, cls_name)) write_pose_file(pose_path, cls_idx, pose) rgb_gl, depth_gl = render_machine.render( pose[:3, :3], pose[:, -1], r_type="mat", K=K_0 ) rgb_gl = rgb_gl.astype("uint8") # depth ------------------ depth_gl = (depth_gl * depth_factor).astype(np.uint16) depth_path = os.path.join( new_data_root, cls_name, "{:06d}-depth.png".format(img_id) ) cv2.imwrite(depth_path, depth_gl) # label --------------------- label_gl = np.zeros(depth_gl.shape) label_gl[depth_gl != 0] = 1 label_path = os.path.join( new_data_root, cls_name, "{:06d}-label.png".format(img_id) ) cv2.imwrite(label_path, label_gl) # real color ---------------------------- color_real = read_img( os.path.join( ori_train_data_root, cls_name, "rgb/{:04d}.png".format(img_id) ), 3, ) # print(color_real.max(), color_real.min()) pad_real = np.zeros((480, 640, 3)) xs = 0 ys = 0 pad_real[xs : 400 + xs, ys : 400 + ys, :] = color_real pad_real = pad_real.astype("uint8") # translate image M = np.float32([[1, 0, px_diff], [0, 1, py_diff]]) pad_real = cv2.warpAffine(pad_real, M, (640, 480)) color_path = os.path.join( new_data_root, cls_name, "{:06d}-color.png".format(img_id) ) cv2.imwrite(color_path, pad_real) # real index real_indices.append("{}/{:06d}".format(cls_name, img_id)) real_indices = sorted(real_indices) real_set_file = os.path.join(real_set_dir, "{}_train.txt".format(cls_name)) with open(real_set_file, "w") as f: for real_idx in real_indices: f.write(real_idx + "\n")
# config for renderer width = 640 height = 480 K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) # LM ZNEAR = 0.25 ZFAR = 6.0 depth_factor = 1000 ######################## modelnet_root = "/data/wanggu/Downloads/modelnet" # change to your dir modelnet40_root = os.path.join(modelnet_root, "ModelNet40") model_set_dir = os.path.join(modelnet_root, "model_set") mkdir_if_missing(model_set_dir) def file_size(file_path): """ this function will return the file size """ if os.path.isfile(file_path): file_info = os.stat(file_path) size_in_MB = file_info.st_size / (1024.0 * 1024.0) return size_in_MB # return convert_bytes(file_info.st_size) sel_classes = train_classes num_models = 50
def main(): sel_classes = classes model_dir = os.path.join(cur_dir, '../../data/LINEMOD_6D/LM6d_converted/models') render_machine = Render_Py(model_dir, classes, K, width, height, ZNEAR, ZFAR) for cls_idx, cls_name in enumerate(classes): if not cls_name in sel_classes: continue print(cls_idx, cls_name) real_indices = [] images = [fn for fn in os.listdir(os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), 'rgb')) if '.png' in fn] images = sorted(images) gt_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), 'gt.yml') gt_dict = load_gt(gt_path) info_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), 'info.yml') info_dict = load_info(info_path) for real_img in tqdm(images): old_color_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), "rgb/{}".format(real_img)) assert os.path.exists(old_color_path), old_color_path old_depth_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), "depth/{}".format(real_img)) assert os.path.exists(old_depth_path), old_depth_path img_id = int(real_img.replace('.png', '')) new_img_id = img_id + 1 # K # K = np.array(info_dict[img_id]['cam_K']).reshape((3, 3)) color_img = cv2.imread(old_color_path, cv2.IMREAD_COLOR) ## depth depth = read_img(old_depth_path, 1) # print(np.max(depth), np.min(depth)) # print(color_img.shape) new_color_path = os.path.join(LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-color.png".format(new_img_id)) new_depth_path = os.path.join(LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-depth.png".format(new_img_id)) mkdir_if_missing(os.path.dirname(new_color_path)) copyfile(old_color_path, new_color_path) copyfile(old_depth_path, new_depth_path) # meta and label meta_dict = {} num_instance = len(gt_dict[img_id]) meta_dict['cls_indexes'] = np.zeros((1, num_instance), dtype=np.int32) meta_dict['boxes'] = np.zeros((num_instance, 4), dtype='float32') meta_dict['poses'] = np.zeros((3,4,num_instance), dtype='float32') distances = [] label_dict = {} for ins_id, instance in enumerate(gt_dict[img_id]): obj_id = instance['obj_id'] meta_dict['cls_indexes'][0, ins_id] = obj_id obj_bb = np.array(instance['obj_bb']) meta_dict['boxes'][ins_id, :] = obj_bb # pose pose = np.zeros((3, 4)) R = np.array(instance['cam_R_m2c']).reshape((3, 3)) t = np.array(instance['cam_t_m2c']) / 1000. # mm -> m pose[:3, :3] = R pose[:3, 3] = t distances.append(t[2]) meta_dict['poses'][:,:,ins_id] = pose image_gl, depth_gl = render_machine.render(obj_id-1, pose[:3, :3], pose[:3, 3], r_type='mat') image_gl = image_gl.astype('uint8') label = np.zeros(depth_gl.shape) label[depth_gl!=0] = 1 label_dict[obj_id] = label meta_path = os.path.join(LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-meta.mat".format(new_img_id)) sio.savemat(meta_path, meta_dict) dis_inds = sorted(range(len(distances)), key=lambda k: -distances[k]) # put deeper objects first # label res_label = np.zeros((480, 640)) for dis_id in dis_inds: cls_id = meta_dict['cls_indexes'][0, dis_id] tmp_label = label_dict[cls_id] # label res_label[tmp_label == 1] = cls_id label_path = os.path.join(LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-label.png".format(new_img_id)) cv2.imwrite(label_path, res_label) def vis_check(): fig = plt.figure(figsize=(8, 6), dpi=120) plt.subplot(2, 3, 1) plt.imshow(color_img[:,:,[2,1,0]]) plt.title('color_img') plt.subplot(2, 3, 2) plt.imshow(depth_gl) plt.title('depth') plt.subplot(2, 3, 3) plt.imshow(depth_gl) plt.title('depth_gl') plt.subplot(2, 3, 4) plt.imshow(res_label) plt.title('res_label') plt.subplot(2,3,5) label_v1_path = os.path.join('/data/wanggu/Storage/LINEMOD_SIXD_wods/LM6d_render_v1/data/real', '{:02d}'.format(class2idx(cls_name)), "{:06d}-label.png".format(new_img_id)) assert os.path.exists(label_v1_path), label_v1_path label_v1 = read_img(label_v1_path, 1) plt.imshow(label_v1) plt.title('label_v1') plt.show() # vis_check() # real idx real_indices.append("{:02d}/{:06d}".format(class2idx(cls_name), new_img_id)) # one idx file for each video of each class real_idx_file = os.path.join(real_set_dir, "{}_all.txt".format(cls_name)) with open(real_idx_file, 'w') as f: for real_idx in real_indices: f.write(real_idx + '\n')
def update(self, labels, preds): from lib.utils.mkdir_if_missing import mkdir_if_missing num_imgs = preds[self.pred.index("image_real")].shape[0] sel_img_idx = np.random.randint(0, num_imgs, 1) image_real = preds[self.pred.index("image_real")].asnumpy() * 0.9 + 128 image_real = np.squeeze(image_real[sel_img_idx, :, :, :]).transpose( 1, 2, 0) / 255 image_real = np.maximum(image_real, 0.0) image_real = np.minimum(image_real, 1.0) image_rendered = preds[self.pred.index( "image_rendered")].asnumpy() + 128 image_rendered = np.squeeze( image_rendered[sel_img_idx, :, :, :]).transpose(1, 2, 0) / 255 zoom_image_real = preds[-2].asnumpy() + 128 zoom_image_real = np.squeeze( zoom_image_real[sel_img_idx, :, :, :]).transpose(1, 2, 0) / 255 zoom_image_real = np.maximum(zoom_image_real, 0.0) zoom_image_real = np.minimum(zoom_image_real, 1.0) zoom_image_rendered = preds[-1].asnumpy() + 128 zoom_image_rendered = np.squeeze( zoom_image_rendered[sel_img_idx, :, :, :]).transpose(1, 2, 0) / 255 zoom_image_rendered = np.maximum(zoom_image_rendered, 0.0) zoom_image_rendered = np.minimum(zoom_image_rendered, 1.0) if self.cfg.network.WITH_MASK and self.cfg.network.PRED_MASK: # input zoom_mask_real_gt = np.squeeze(preds[-5].asnumpy()[sel_img_idx, 0, :, :]) zoom_mask_real_est = np.squeeze(preds[-4].asnumpy()[sel_img_idx, 0, :, :]) zoom_mask_rendered = np.squeeze(preds[-3].asnumpy()[sel_img_idx, 0, :, :]) # output zoom_mask_prob = np.squeeze(preds[self.pred.index( "mask_prob_iter0")].asnumpy()[sel_img_idx, 0, :, :]) zoom_mask_pred_bin = np.round(zoom_mask_prob) if self.cfg.network.PRED_FLOW: # flow import cv2 flow_est = preds[self.pred.index("flow_est_crop")].asnumpy() print("flow_est:", flow_est.shape) flow_est = np.squeeze(flow_est[sel_img_idx, :, :, :]).transpose( 1, 2, 0) flow_loss = preds[self.pred.index("flow_loss")].asnumpy() flow = labels[self.label.index("flow")].asnumpy() print("flow: ", flow.shape) flow = np.squeeze(flow[sel_img_idx, :, :, :]).transpose(1, 2, 0) flow_weights = labels[self.label.index("flow_weight")].asnumpy() flow_weights = np.squeeze( flow_weights[sel_img_idx, :, :, :]).transpose([1, 2, 0]) visible = np.squeeze(flow_weights[:, :, 0]) != 0 print("image_rendered: ", image_rendered.shape, image_rendered.min(), image_rendered.max()) height = image_real.shape[0] width = image_rendered.shape[1] mesh_real = np.zeros((height, width, 3), np.uint8) mesh_rendered = np.zeros((height, width, 3), np.uint8) mesh_real_est = np.zeros((height, width, 3), np.uint8) for h in range(0, height, 3): for w in range(0, width, 3): if visible[h, w]: cur_flow = flow[h, w, :] cur_flow_est = flow_est[h, w, :] mesh_rendered = cv2.circle( mesh_rendered, (np.round(w).astype(int), np.round(h).astype(int)), 1, (h * 255 / height, 255 - w * 255 / width, w * 255 / width), 5, ) mesh_real = cv2.circle( mesh_real, (np.round(w + cur_flow[1]).astype(int), np.round(h + cur_flow[0]).astype(int)), 1, (h * 255 / height, 255 - w * 255 / width, w * 255 / width), 5, ) point = np.round( [w + cur_flow_est[1], h + cur_flow_est[0]]).astype(int) point[0] = min(max(point[0], 0), width) point[1] = min(max(point[1], 0), height) mesh_real_est = cv2.circle( mesh_real_est, (point[0], point[1]), 1, (h * 255 / height, 255 - w * 255 / width, 127), 5) print("est_loss: {}".format( np.sum(flow_weights * self.l2(flow - flow_est)))) print("act_loss: {}".format(np.sum(flow_loss))) time_str = time.strftime("%Y-%m-%d-%H-%M-%S") mkdir_if_missing(self.save_dir) self.save_fig(image_real, "{}/{}_image_real.png".format(self.save_dir, time_str)) self.save_fig( image_rendered, "{}/{}_image_rendered.png".format(self.save_dir, time_str)) self.save_fig( zoom_image_real, "{}/{}_zoom_image_real.png".format(self.save_dir, time_str)) self.save_fig( zoom_image_rendered, "{}/{}_zoom_image_rendered.png".format(self.save_dir, time_str)) if self.cfg.network.PRED_MASK: self.save_fig( zoom_mask_real_est, "{}/{}_zoom_mask_real_est.png".format(self.save_dir, time_str)) self.save_fig( zoom_mask_real_gt, "{}/{}_zoom_mask_real_gt.png".format(self.save_dir, time_str)) self.save_fig( zoom_mask_rendered, "{}/{}_zoom_mask_rendered.png".format(self.save_dir, time_str)) self.save_fig( zoom_mask_pred_bin, "{}/{}_zoom_mask_pred_bin.png".format(self.save_dir, time_str)) if self.cfg.network.PRED_FLOW: self.save_fig( mesh_real, "{}/{}_mesh_real.png".format(self.save_dir, time_str)) self.save_fig( mesh_rendered, "{}/{}_mesh_rendered.png".format(self.save_dir, time_str)) self.save_fig( mesh_real_est, "{}/{}_mesh_real_est.png".format(self.save_dir, time_str)) print("=====================")
height = 480 K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) # LM ZNEAR = 0.25 ZFAR = 6.0 depth_factor = 1000 # init render machines # brightness_ratios = [0.2, 0.25, 0.3, 0.35, 0.4] ################### modelnet_root = "/data/wanggu/Downloads/modelnet" # NB: change to your dir modelnet40_root = os.path.join(modelnet_root, "ModelNet40") data_dir = os.path.join(modelnet_root, "modelnet_render_v1/data/real") model_set_dir = os.path.join(modelnet_root, "model_set") example_render_dir = os.path.join(modelnet_root, "example_render") mkdir_if_missing(example_render_dir) for cls_i, cls_name in enumerate(classes): if not cls_name in ["door", "glass_box", "wardrobe", "plant", "xbox"]: continue print(cls_name) class_real_dir = os.path.join(data_dir, cls_name) for set in ["train", "test"]: image_list = [ os.path.join(class_real_dir, set, fn) for fn in os.listdir(os.path.join(class_real_dir, set)) if "0000-color" in fn ] image_list = sorted(image_list) with open(
"monitor": 25, "sofa": 26, "night_stand": 27, } # airplane # init render machines # brightness_ratios = [0.2, 0.25, 0.3, 0.35, 0.4] ################### modelnet_root = os.path.join(cur_dir, "../data/ModelNet") modelnet40_root = os.path.join(modelnet_root, "ModelNet40") data_dir = os.path.join(modelnet_root, "modelnet_render_v1/data/real") real_set_dir = os.path.join(modelnet_root, "modelnet_render_v1/image_set/real") image_set_dir = os.path.join(modelnet_root, "modelnet_render_v1/image_set") rendered_data_dir = os.path.join(modelnet_root, "modelnet_render_v1/data/rendered") mkdir_if_missing(rendered_data_dir) for cls_i, cls_name in enumerate(classes): if not cls_name in test_classes: continue print(cls_name) seed = seed_dict[cls_name] random.seed(seed) np.random.seed(seed) seed_list = [random.randint(0, 10000) for i in range(100000)] with open( os.path.join(real_set_dir, "{}_{}_real.txt".format(cls_name, "all"))) as f: all_indices = [line.strip() for line in f.readlines()]
def main(): sel_classes = classes render_machine = Render_Py(model_dir, classes, K, width, height, ZNEAR, ZFAR) for cls_idx, cls_name in enumerate(classes): if not cls_name in sel_classes: continue print(cls_idx, cls_name) observed_indices = [] images = [ fn for fn in os.listdir( os.path.join(LM6d_origin_root, '{:02d}'.format( class2idx(cls_name)), 'rgb')) if '.png' in fn ] images = sorted(images) gt_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), 'gt.yml') gt_dict = load_gt(gt_path) info_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), 'info.yml') info_dict = load_info(info_path) for observed_img in tqdm(images): old_color_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), "rgb/{}".format(observed_img)) assert os.path.exists(old_color_path), old_color_path old_depth_path = os.path.join(LM6d_origin_root, '{:02d}'.format(class2idx(cls_name)), "depth/{}".format(observed_img)) assert os.path.exists(old_depth_path), old_depth_path img_id = int(observed_img.replace('.png', '')) new_img_id = img_id + 1 # K # K = np.array(info_dict[img_id]['cam_K']).reshape((3, 3)) color_img = cv2.imread(old_color_path, cv2.IMREAD_COLOR) ## depth depth = read_img(old_depth_path, 1) # print(np.max(depth), np.min(depth)) # print(color_img.shape) new_color_path = os.path.join( LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-color.png".format(new_img_id)) new_depth_path = os.path.join( LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-depth.png".format(new_img_id)) mkdir_if_missing(os.path.dirname(new_color_path)) copyfile(old_color_path, new_color_path) copyfile(old_depth_path, new_depth_path) # meta and label meta_dict = {} num_instance = len(gt_dict[img_id]) meta_dict['cls_indexes'] = np.zeros((1, num_instance), dtype=np.int32) meta_dict['boxes'] = np.zeros((num_instance, 4), dtype='float32') meta_dict['poses'] = np.zeros((3, 4, num_instance), dtype='float32') distances = [] label_dict = {} for ins_id, instance in enumerate(gt_dict[img_id]): obj_id = instance['obj_id'] meta_dict['cls_indexes'][0, ins_id] = obj_id obj_bb = np.array(instance['obj_bb']) meta_dict['boxes'][ins_id, :] = obj_bb # pose pose = np.zeros((3, 4)) R = np.array(instance['cam_R_m2c']).reshape((3, 3)) t = np.array(instance['cam_t_m2c']) / 1000. # mm -> m pose[:3, :3] = R pose[:3, 3] = t distances.append(t[2]) meta_dict['poses'][:, :, ins_id] = pose image_gl, depth_gl = render_machine.render(obj_id - 1, pose[:3, :3], pose[:3, 3], r_type='mat') image_gl = image_gl.astype('uint8') label = np.zeros(depth_gl.shape) label[depth_gl != 0] = 1 label_dict[obj_id] = label meta_path = os.path.join(LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-meta.mat".format(new_img_id)) sio.savemat(meta_path, meta_dict) dis_inds = sorted( range(len(distances)), key=lambda k: -distances[k]) # put deeper objects first # label res_label = np.zeros((480, 640)) for dis_id in dis_inds: cls_id = meta_dict['cls_indexes'][0, dis_id] tmp_label = label_dict[cls_id] # label res_label[tmp_label == 1] = cls_id label_path = os.path.join(LM6d_new_root, '{:02d}'.format(class2idx(cls_name)), "{:06d}-label.png".format(new_img_id)) cv2.imwrite(label_path, res_label) # observed idx observed_indices.append("{:02d}/{:06d}".format( class2idx(cls_name), new_img_id))
height = 540 crop_width = 640 # crop 80 crop_height = 480 # crop 60 K_0 = np.array([[1075.65091572, 0, 320.0], [0, 1073.90347929, 240.0], [0, 0, 1]]) # Primesense # in test set, K is different for each image ZNEAR = 0.25 ZFAR = 6.0 DEPTH_FACTOR = 10000 # new data root ----------- new_data_root = os.path.join(TLESS_root, "TLESS_render_v3/data/real/test") mkdir_if_missing(new_data_root) real_set_dir = os.path.join(TLESS_root, "TLESS_render_v3/image_set/real") mkdir_if_missing(real_set_dir) def read_img(path, n_channel=3): if n_channel == 3: img = cv2.imread(path, cv2.IMREAD_COLOR) elif n_channel == 1: img = cv2.imread(path, cv2.IMREAD_UNCHANGED) else: raise Exception("Unsupported n_channel: {}".format(n_channel)) return img
def main(): gen_images = True for class_idx, class_name in idx2class.items(): train_pair = [] val_pair = [] print("start ", class_idx, class_name) if class_name in ["__back_ground__"]: continue # uncomment here to only generate data for ape # if class_name not in ['ape']: # continue if gen_images: # init renderer model_dir = os.path.join(LM6d_root, "models", class_name) render_machine = Render_Py(model_dir, K, width, height, ZNEAR, ZFAR) for set_type in ["all"]: with open( os.path.join(observed_set_root, "{}_{}.txt".format(class_name, "all")), "r", ) as f: all_observed_list = [x.strip() for x in f.readlines()] # with open( # os.path.join(observed_set_root, '{}_{}.txt'.format( # class_name, 'train')), 'r') as f: # train_observed_list = [x.strip() for x in f.readlines()] with open( os.path.join(observed_set_root, "{}_{}.txt".format(class_name, "test")), "r", ) as f: test_observed_list = [x.strip() for x in f.readlines()] with open(rendered_pose_path.format(set_type, class_name)) as f: str_rendered_pose_list = [x.strip().split(" ") for x in f.readlines()] rendered_pose_list = np.array( [[float(x) for x in each_pose] for each_pose in str_rendered_pose_list] ) rendered_per_observed = 10 assert len(rendered_pose_list) == 10 * len( all_observed_list ), "{} vs {}".format(len(rendered_pose_list), len(all_observed_list)) for idx, observed_index in enumerate(tqdm(all_observed_list)): video_name, observed_prefix = observed_index.split("/") rendered_dir = os.path.join(rendered_root_dir, class_name) mkdir_if_missing(rendered_dir) for inner_idx in range(rendered_per_observed): if gen_images: # if gen_images and observed_index in test_observed_list and inner_idx == 0: # only generate my_val_v{} # noqa:E501 image_file = os.path.join( rendered_dir, "{}_{}-color.png".format(observed_prefix, inner_idx), ) depth_file = os.path.join( rendered_dir, "{}_{}-depth.png".format(observed_prefix, inner_idx), ) rendered_idx = idx * rendered_per_observed + inner_idx pose_rendered_q = rendered_pose_list[rendered_idx] rgb_gl, depth_gl = render_machine.render( pose_rendered_q[:4], pose_rendered_q[4:] ) rgb_gl = rgb_gl.astype("uint8") depth_gl = (depth_gl * depth_factor).astype(np.uint16) cv2.imwrite(image_file, rgb_gl) cv2.imwrite(depth_file, depth_gl) pose_rendered_file = os.path.join( rendered_dir, "{}_{}-pose.txt".format(observed_prefix, inner_idx), ) text_file = open(pose_rendered_file, "w") text_file.write("{}\n".format(class_idx)) pose_rendered_m = np.zeros((3, 4)) pose_rendered_m[:, :3] = RT_transform.quat2mat( pose_rendered_q[:4] ) pose_rendered_m[:, 3] = pose_rendered_q[4:] pose_ori_m = pose_rendered_m pose_str = "{} {} {} {}\n{} {} {} {}\n{} {} {} {}".format( pose_ori_m[0, 0], pose_ori_m[0, 1], pose_ori_m[0, 2], pose_ori_m[0, 3], pose_ori_m[1, 0], pose_ori_m[1, 1], pose_ori_m[1, 2], pose_ori_m[1, 3], pose_ori_m[2, 0], pose_ori_m[2, 1], pose_ori_m[2, 2], pose_ori_m[2, 3], ) text_file.write(pose_str) if observed_index in test_observed_list: if inner_idx == 0: val_pair.append( "{} {}/{}_{}".format( observed_index, class_name, observed_prefix, inner_idx, ) ) else: train_pair.append( "{} {}/{}_{}".format( observed_index, class_name, observed_prefix, inner_idx ) ) train_pair_set_file = os.path.join( pair_set_dir, "train_{}.txt".format(class_name) ) train_pair = sorted(train_pair) with open(train_pair_set_file, "w") as text_file: for x in train_pair: text_file.write("{}\n".format(x)) test_pair_set_file = os.path.join( pair_set_dir, "my_val_{}.txt".format(class_name) ) val_pair = sorted(val_pair) with open(test_pair_set_file, "w") as text_file: for x in val_pair: text_file.write("{}\n".format(x)) print(class_name, " done")
def adapt_color_depth_pose_label(): # real/test/cls_name/vidoe_name/06d-color.png for cls_idx, cls_name in enumerate(class_list): if not cls_name in sel_classes: continue print(cls_idx, cls_name) real_indices = [] model_path = os.path.join( TLESS_root, "models/{}/obj_{}_scaled.ply".format(cls_name, cls_name)) model = inout.load_ply(model_path) for video_name in sel_videos: # 02 print("video name: {}".format(video_name)) images = [ fn for fn in os.listdir( os.path.join(origin_data_root, video_name, "rgb")) if ".png" in fn ] images = sorted(images) gt_path = os.path.join(origin_data_root, video_name, "gt.yml") gt_dict = load_gt(gt_path) info_path = os.path.join(origin_data_root, video_name, "info.yml") info_dict = load_info(info_path) for real_img in tqdm(images): old_color_path = os.path.join(origin_data_root, video_name, "rgb/{}".format(real_img)) assert os.path.exists(old_color_path), old_color_path old_depth_path = os.path.join(origin_data_root, video_name, "depth/{}".format(real_img)) assert os.path.exists(old_depth_path), old_depth_path img_id = int(real_img.replace(".png", "")) # K K = np.array(info_dict[img_id]["cam_K"]).reshape((3, 3)) K_diff = K_0 - K cx_diff = K_diff[0, 2] cy_diff = K_diff[1, 2] px_diff = int(np.round(cx_diff)) py_diff = int(np.round(cy_diff)) color_img = cv2.imread(old_color_path, cv2.IMREAD_COLOR) # translate M = np.float32([[1, 0, px_diff], [0, 1, py_diff]]) color_img = cv2.warpAffine(color_img, M, (720, 540)) # crop to (480, 640) crop_color = color_img[:480, :640, :] ## depth # translate depth = read_img(old_depth_path, 1) depth = cv2.warpAffine(depth, M, (720, 540)) # crop crop_depth = depth[:480, :640] # print(color_img.shape) for ins_id, instance in enumerate(gt_dict[img_id]): obj_id = instance["obj_id"] if obj_id == int(cls_name): new_color_path = os.path.join( new_data_root, cls_name, video_name, "{:06d}_{}-color.png".format(img_id, ins_id), ) new_depth_path = os.path.join( new_data_root, cls_name, video_name, "{:06d}_{}-depth.png".format(img_id, ins_id), ) mkdir_if_missing( os.path.join(new_data_root, cls_name, video_name)) # save color img cv2.imwrite(new_color_path, crop_color) cv2.imwrite(new_depth_path, crop_depth) # pose pose = np.zeros((3, 4)) R = np.array(instance["cam_R_m2c"]).reshape((3, 3)) t = np.array(instance["cam_t_m2c"]) / 1000.0 # mm -> m pose[:3, :3] = R pose[:3, 3] = t pose_path = os.path.join( new_data_root, cls_name, video_name, "{:06d}_{}-pose.txt".format(img_id, ins_id), ) write_pose_file(pose_path, cls_idx, pose) # label # depth = read_img(old_depth_path, 1) surf_color = None # (1, 0, 0) # ????? im_size = (640, 480) # (w, h) ren_rgb, ren_depth = renderer.render( model, im_size, K_0, R, t, clip_near=ZNEAR, clip_far=ZFAR, surf_color=surf_color, mode="rgb+depth", ) ren_rgb = ren_rgb.astype("uint8") # print('ren_rgb: ', ren_rgb.max(), ren_rgb.min()) # print('ren_depth: ', ren_depth.max(), ren_depth.min()) label = np.zeros((crop_height, crop_width)) label[ren_depth != 0] = 1 label_path = os.path.join( new_data_root, cls_name, video_name, "{:06d}_{}-label.png".format(img_id, ins_id), ) cv2.imwrite(label_path, label) # def vis_check(): # fig = plt.figure(figsize=(8, 6), dpi=120) # plt.subplot(2, 3, 1) # # plt.imshow(crop_depth) # plt.title('crop_depth') # # plt.subplot(2, 3, 2) # # plt.imshow(label) # # plt.title('label rendered') # depth_diff = crop_depth.copy() # depth_diff[ren_depth!=0] = 0 # plt.imshow(depth_diff) # plt.title('depth_diff') # # plt.subplot(2, 3, 3) # # color_img = read_img(old_color_path, 3) # plt.imshow(crop_color[:, :, [2, 1, 0]]) # plt.title('color image') # # plt.subplot(2, 3, 4) # plt.imshow(ren_rgb) # plt.title('ren_rgb') # # plt.subplot(2, 3, 5) # plt.imshow(ren_depth) # plt.title('ren_depth') # # plt.subplot(2, 3, 6) # color_diff = crop_color - ren_rgb # plt.imshow(color_diff) # plt.title('color_diff') # # plt.show() # vis_check() # real idx real_indices.append("test/{}/{}/{:06d}_{}".format( cls_name, video_name, img_id, ins_id)) # one idx file for each video of each class real_idx_file = os.path.join( real_set_dir, "{}_{}_test.txt".format(cls_name, video_name)) with open(real_idx_file, "w") as f: for real_idx in real_indices: f.write(real_idx + "\n")
classes = sorted(classes) def class2idx(class_name, idx2class=idx2class): for k, v in idx2class.items(): if v == class_name: return k LM6d_occ_dsm_root = os.path.join( cur_path, "../data/LINEMOD_6D/LM6d_converted/LM6d_occ_dsm" ) # output dir pose_dir = os.path.join(LM6d_occ_dsm_root, "ds_rendered_poses") mkdir_if_missing(pose_dir) sel_classes = classes num_rendered_per_observed = 1 # 10 K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) version = "v1" angle_std, angle_max, x_std, y_std, z_std = [15.0, 45.0, 0.01, 0.01, 0.05] print(angle_std, angle_max, x_std, y_std, z_std) image_set = "NDtrain" def main(): for cls_name in tqdm(sel_classes): print(cls_name) # if cls_name != 'driller': # continue
K = np.array([[572.4114, 0, 325.2611], [0, 573.57043, 242.04899], [0, 0, 1]]) ZNEAR = 0.25 ZFAR = 6.0 DEPTH_FACTOR = 1000 real_set_dir = os.path.join( cur_dir, '../data/LINEMOD_6D/LM6d_render_v1/image_set/real') real_data_root = os.path.join(cur_dir, '../data/LINEMOD_6D/LM6d_render_v1/data/real') LM6d_root = os.path.join(cur_dir, '../data/LINEMOD_6D') # render real render_real_root = os.path.join( cur_dir, '../data/LINEMOD_6D/LM6d_render_v1/data/render_real/occ_test/') mkdir_if_missing(render_real_root) # ========================================================== def write_pose_file(pose_file, class_idx, pose_ori_m): text_file = open(pose_file, 'w') text_file.write("{}\n".format(class_idx)) pose_str = "{} {} {} {}\n{} {} {} {}\n{} {} {} {}" \ .format(pose_ori_m[0, 0], pose_ori_m[0, 1], pose_ori_m[0, 2], pose_ori_m[0, 3], pose_ori_m[1, 0], pose_ori_m[1, 1], pose_ori_m[1, 2], pose_ori_m[1, 3], pose_ori_m[2, 0], pose_ori_m[2, 1], pose_ori_m[2, 2], pose_ori_m[2, 3]) text_file.write(pose_str) def gen_render_real(): for cls_idx, cls_name in idx2class.items():