def downsample_point_clouds(): cfg = parse_arguments() vox_size = cfg.downsample_voxel_size synth_set = cfg.synth_set inp_dir = os.path.join(cfg.inp_dir, synth_set) files = glob.glob('{}/*.mat'.format(inp_dir)) out_dir = cfg.out_dir out_synthset = os.path.join(out_dir, cfg.synth_set) mkdir_if_missing(out_synthset) for k, model_file in enumerate(files): print("{}/{}".format(k, len(files))) file_name = os.path.basename(model_file) sample_name, _ = os.path.splitext(file_name) obj = scipy.io.loadmat(model_file) out_filename = "{}/{}.mat".format(out_synthset, sample_name) if os.path.isfile(out_filename): print("already exists:", sample_name) continue Vgt = obj["points"] pcd = open3d.PointCloud() pcd.points = open3d.Vector3dVector(Vgt) downpcd = open3d.voxel_down_sample(pcd, voxel_size=vox_size) down_xyz = np.asarray(downpcd.points) scipy.io.savemat(out_filename, {"points": down_xyz})
def train(): cfg = app_config setup_environment(cfg) train_dir = cfg.checkpoint_dir mkdir_if_missing(train_dir) # tf.logging.set_verbosity(tf.logging.INFO) split_name = "val" #train dataset_file = os.path.join(cfg.inp_dir, f"{cfg.synth_set}_{split_name}.pkl") dataset = Chair_dataset(dataset_file, cfg) if cfg.shuffle_dataset: torch.manual_seed(7000) print("*" * 30) print('creating dataloader') train_loader = DataLoader(dataset=dataset, batch_size=cfg.batch_size, num_workers=8, shuffle=cfg.shuffle_dataset) for epoch in tqdm(range(cfg.max_number_of_steps), desc='Epoch', ncols=100): train_size = len(train_loader) ts = time.time() print_now = 0 for batch_idx, train_data in tqdm(enumerate(train_loader), desc='Batch', total=train_size, ncols=100): # global_step = tf.train.get_or_create_global_step() # model = model_pc_pytorch.ModelPointCloud(cfg, global_step) model = model_pc_pytorch.ModelPointCloud(cfg) inputs = preprocess(cfg, train_data) # print('inputs shape') # for i in inputs: # print(i, inputs[i].shape) # Call Forward of model # outputs = model(inputs) # task_loss = model.get_loss(inputs, outputs) # print(inputs.keys()) loss = model.optimize_parameters(inputs) if print_now % 200 == 0: print("Epoch: %d, Step: %d, Loss: %f" % (epoch, print_now, loss.item())) print_now += 1 #reg_loss = regularization_loss(train_scopes, cfg) #loss = task_loss + reg_loss # break # break print("Training Complete!") '''
def main(_): cfg = app_config exp_dir = cfg.checkpoint_dir out_dir = os.path.join(exp_dir, 'render') mkdir_if_missing(out_dir) inp_dir = os.path.join(exp_dir, cfg.save_predictions_dir) if cfg.models_list: models = parse_lines(cfg.models_list) else: dataset = Dataset3D(cfg) models = [sample.name for sample in dataset.data] for model_name in models: in_file = "{}/{}_pc.mat".format(inp_dir, model_name) if not os.path.isfile(in_file): in_file = "{}/{}_pc.npz".format(inp_dir, model_name) assert os.path.isfile( in_file), "no input file with saved point cloud" out_file = "{}/{}.png".format(out_dir, model_name) if os.path.isfile(out_file): print("{} already rendered".format(model_name)) continue args = build_command_line_args( [["in_file", in_file], ["out_file", out_file], ["vis_azimuth", cfg.vis_azimuth], ["vis_elevation", cfg.vis_elevation], ["vis_dist", cfg.vis_dist], ["cycles_samples", cfg.render_cycles_samples], ["voxels", False], ["colored_subsets", cfg.render_colored_subsets], ["image_size", cfg.render_image_size]]) render_cmd = "{} --background -P {} -- {}".format( blender_exec, python_script, args) os.system(render_cmd)
def create_record(synth_set, split_name, models): im_size = args.image_size num_views = args.num_views num_models = len(models) mkdir_if_missing(args.out_dir) # address to save the data train_filename = "{}/{}_{}.pkl".format(args.out_dir, synth_set, split_name) render_dir = os.path.join(args.inp_dir_renders, synth_set) voxel_dir = os.path.join(args.inp_dir_voxels, synth_set) imagel, maskl, namel,voxl, extl,cam_posl,depthl =[],[],[],[],[],[],[] for j, model in enumerate(models): print("{}/{}".format(j, num_models)) if args.store_voxels: voxels_file = os.path.join(voxel_dir, "{}.mat".format(model)) voxels = loadmat(voxels_file)["Volume"].astype(np.float32) # this needed to be compatible with the # PTN projections voxels = np.transpose(voxels, (1, 0, 2)) voxels = np.flip(voxels, axis=1) im_dir = os.path.join(render_dir, model) images = sorted(glob.glob("{}/render_*.png".format(im_dir))) rgbs = np.zeros((num_views, im_size, im_size, 3), dtype=np.float32) masks = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32) cameras = np.zeros((num_views, 4, 4), dtype=np.float32) cam_pos = np.zeros((num_views, 3), dtype=np.float32) depths = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32) assert (len(images) >= num_views) for k in range(num_views): im_file = images[k] img = imread(im_file) rgb = img[:, :, 0:3] mask = img[:, :, [3]] mask = mask / 255.0 if True: # white background mask_fg = np.repeat(mask, 3, 2) mask_bg = 1.0 - mask_fg rgb = rgb * mask_fg + np.ones(rgb.shape) * 255.0 * mask_bg # plt.imshow(rgb.astype(np.uint8)) # plt.show() rgb = rgb / 255.0 actual_size = rgb.shape[0] if im_size != actual_size: rgb = im_resize(rgb, (im_size, im_size), order=3) mask = im_resize(mask, (im_size, im_size), order=3) rgbs[k, :, :, :] = rgb masks[k, :, :, :] = mask fn = os.path.basename(im_file) img_idx = int(re.search(r'\d+', fn).group()) if args.store_camera: cam_file = "{}/camera_{}.mat".format(im_dir, img_idx) cam_extr, pos = read_camera(cam_file) cameras[k, :, :] = cam_extr cam_pos[k, :] = pos if args.store_depth: depth_file = "{}/depth_{}.png".format(im_dir, img_idx) depth = loadDepth(depth_file) d_max = 10.0 d_min = 0.0 depth = (depth - d_min) / d_max depth_r = im_resize(depth, (im_size, im_size), order=0) depth_r = depth_r * d_max + d_min depths[k, :, :] = np.expand_dims(depth_r, -1) imagel.append(_dtype_feature(rgbs)) maskl.append(_dtype_feature(masks)) namel.append(_string_feature(model)) if args.store_voxels: voxl.append(_dtype_feature(voxels)) if args.store_camera: extl.append(_dtype_feature(cameras)) cam_posl.append(_dtype_feature(cam_pos)) if args.store_depth: depthl.append(_dtype_feature(depths)) """ plt.imshow(np.squeeze(img[:,:,0:3])) plt.show() plt.imshow(np.squeeze(img[:,:,3]).astype(np.float32)/255.0) plt.show() """ feature = { "image": imagel, "mask": maskl, "name": namel, "vox": voxl, "extrinsic": extl, "cam_pos": cam_posl, "depth": depthl } with open(train_filename, 'wb') as fp: #json.dump(feature, fp) pickle.dump(feature, fp)
def create_record(synth_set, split_name, models): im_size = FLAGS.image_size num_views = FLAGS.num_views num_models = len(models) mkdir_if_missing(FLAGS.out_dir) # # address to save the TFRecords file # train_filename = "{}/{}_{}.tfrecords".format(FLAGS.out_dir, synth_set, split_name) # # open the TFRecords file # options = tf_record_options(FLAGS) # writer = tf.python_io.TFRecordWriter(train_filename, options=options) render_dir = os.path.join(FLAGS.inp_dir_renders, synth_set) voxel_dir = os.path.join(FLAGS.inp_dir_voxels, synth_set) for j, model in enumerate(models): print("{}/{}".format(j, num_models)) # if FLAGS.store_voxels: # voxels_file = os.path.join(voxel_dir, "{}.mat".format(model)) # voxels = loadmat(voxels_file)["Volume"].astype(np.float32) # # this needed to be compatible with the # # PTN projections # voxels = np.transpose(voxels, (1, 0, 2)) # voxels = np.flip(voxels, axis=1) im_dir = os.path.join(render_dir, model, 'imgs') images = sorted(glob.glob("{}/*.jpg".format(im_dir))) rgbs = np.zeros((num_views, im_size, im_size, 3), dtype=np.float32) masks = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32) cameras = np.zeros((num_views, 4, 4), dtype=np.float32) cam_pos = np.zeros((num_views, 3), dtype=np.float32) depths = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32) assert (len(images) >= num_views) for k in range(num_views): im_file = images[k] img = imread(im_file) mask = imread(im_file.replace('imgs', 'masks')) rgb = img[:, :, 0:3] mask = mask.reshape(mask.shape[0], mask.shape[1], 1) mask = mask / 255.0 if True: # white background mask_fg = np.repeat(mask, 3, 2) mask_bg = 1.0 - mask_fg rgb = rgb * mask_fg + np.ones(rgb.shape) * 255.0 * mask_bg # plt.imshow(rgb.astype(np.uint8)) # plt.show() rgb = rgb / 255.0 actual_size = rgb.shape[0] if im_size != actual_size: rgb = im_resize(rgb, (im_size, im_size), order=3) mask = im_resize(mask, (im_size, im_size), order=3) rgbs[k, :, :, :] = rgb masks[k, :, :, :] = mask fn = os.path.basename(im_file) img_idx = int(re.search(r'\d+', fn).group()) if FLAGS.store_camera: cam_file = "{}/camera_{}.mat".format(im_dir, img_idx) cam_extr, pos = read_camera(cam_file) cameras[k, :, :] = cam_extr cam_pos[k, :] = pos if FLAGS.store_depth: depth_file = "{}/depth_{}.png".format(im_dir, img_idx) depth = loadDepth(depth_file) d_max = 10.0 d_min = 0.0 depth = (depth - d_min) / d_max depth_r = im_resize(depth, (im_size, im_size), order=0) depth_r = depth_r * d_max + d_min depths[k, :, :] = np.expand_dims(depth_r, -1) # Create a feature feature = {"image": rgbs, "mask": masks, "name": model} # if FLAGS.store_voxels: # feature["vox"] = voxels if FLAGS.store_camera: # feature["extrinsic"] = _dtype_feature(extrinsic) feature["extrinsic"] = cameras feature["cam_pos"] = cam_pos if FLAGS.store_depth: feature["depth"] = depths feature_file = "{}/{}_features.p".format(FLAGS.out_dir, model) with open(feature_file, 'wb') as f: pickle.dump(feature, f)
def compute_predictions(): cfg = app_config setup_environment(cfg) exp_dir = cfg.checkpoint_dir cfg.batch_size = 1 cfg.step_size = 1 pc_num_points = cfg.pc_num_points vox_size = cfg.vox_size save_pred = cfg.save_predictions save_voxels = cfg.save_voxels fast_conversion = True pose_student = cfg.pose_predictor_student and cfg.predict_pose g = tf.Graph() with g.as_default(): model = model_pc.ModelPointCloud(cfg) out = build_model(model) input_image = out["inputs"] cam_matrix = out["camera_extr_src"] cam_quaternion = out["cam_quaternion"] point_cloud = out["points_1"] rgb = out["rgb_1"] if cfg.pc_rgb else tf.no_op() projs = out["projs"] projs_rgb = out["projs_rgb"] projs_depth = out["projs_depth"] cam_transform = out["cam_transform"] z_latent = out["z_latent"] if pose_student: proj_student, camera_pose_student = model_student( input_image, model) input_pc = tf.placeholder(tf.float32, [cfg.batch_size, None, 3]) if save_voxels: if fast_conversion: voxels, _ = pointcloud2voxels3d_fast(cfg, input_pc, None) voxels = tf.expand_dims(voxels, axis=-1) voxels = smoothen_voxels3d(cfg, voxels, model.gauss_kernel()) else: voxels = pointcloud2voxels(cfg, input_pc, model.gauss_sigma()) q_inp = tf.placeholder(tf.float32, [1, 4]) q_matrix = as_rotation_matrix(q_inp) input_pc, pred_quat, gt_quat, pc_unrot = model_unrotate_points(cfg) pc_rot = quaternion_rotate(input_pc, pred_quat) config = tf.ConfigProto(device_count={'GPU': 1}) config.gpu_options.per_process_gpu_memory_fraction = cfg.per_process_gpu_memory_fraction sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) variables_to_restore = slim.get_variables_to_restore(exclude=["meta"]) restorer = tf.train.Saver(variables_to_restore) checkpoint_file = tf.train.latest_checkpoint(exp_dir) print("restoring checkpoint", checkpoint_file) restorer.restore(sess, checkpoint_file) save_dir = os.path.join(exp_dir, '{}_vis_proj'.format(cfg.save_predictions_dir)) mkdir_if_missing(save_dir) save_pred_dir = os.path.join(exp_dir, cfg.save_predictions_dir) mkdir_if_missing(save_pred_dir) vis_size = cfg.vis_size dataset = Dataset3D(cfg) pose_num_candidates = cfg.pose_predict_num_candidates num_views = cfg.num_views plot_h = 4 plot_w = 6 num_views = int(min(num_views, plot_h * plot_w / 2)) if cfg.models_list: model_names = parse_lines(cfg.models_list) else: model_names = [sample.name for sample in dataset.data] num_models = len(model_names) for k in range(num_models): model_name = model_names[k] sample = dataset.sample_by_name(model_name) images = sample.image masks = sample.mask if cfg.saved_camera: cameras = sample.camera cam_pos = sample.cam_pos if cfg.vis_depth_projs: depths = sample.depth if cfg.variable_num_views: num_views = sample.num_views print("{}/{} {}".format(k, num_models, model_name)) if pose_num_candidates == 1: grid = np.empty((plot_h, plot_w), dtype=object) else: plot_w = pose_num_candidates + 1 if pose_student: plot_w += 1 grid = np.empty((num_views, plot_w), dtype=object) if save_pred: all_pcs = np.zeros((num_views, pc_num_points, 3)) all_cameras = np.zeros((num_views, 4)) all_voxels = np.zeros((num_views, vox_size, vox_size, vox_size)) all_z_latent = np.zeros((num_views, cfg.fc_dim)) for view_idx in range(num_views): input_image_np = images[[view_idx], :, :, :] gt_mask_np = masks[[view_idx], :, :, :] if cfg.saved_camera: extr_mtr = cameras[view_idx, :, :] cam_quaternion_np = quaternion_from_campos( cam_pos[view_idx, :]) cam_quaternion_np = np.expand_dims(cam_quaternion_np, axis=0) else: extr_mtr = np.zeros((4, 4)) if cfg.pc_rgb: proj_tensor = projs_rgb elif cfg.vis_depth_projs: proj_tensor = projs_depth else: proj_tensor = projs (pc_np, rgb_np, proj_np, cam_transf_np, z_latent_np) = sess.run( [point_cloud, rgb, proj_tensor, cam_transform, z_latent], feed_dict={ input_image: input_image_np, cam_matrix: extr_mtr, cam_quaternion: cam_quaternion_np }) if pose_student: (proj_student_np, camera_student_np) = sess.run( [proj_student, camera_pose_student], feed_dict={input_image: input_image_np}) predicted_camera = camera_student_np else: predicted_camera = cam_transf_np if cfg.vis_depth_projs: proj_np = normalise_depthmap(proj_np) if depths is not None: depth_np = depths[view_idx, :, :, :] depth_np = normalise_depthmap(depth_np) else: depth_np = 1.0 - np.squeeze(gt_mask_np) if pose_student: proj_student_np = normalise_depthmap(proj_student_np) if cfg.predict_pose: if cfg.save_rotated_points: ref_rot = scipy.io.loadmat( "{}/final_reference_rotation.mat".format(exp_dir)) ref_rot = ref_rot["rotation"] pc_np_unrot = sess.run(pc_rot, feed_dict={ input_pc: pc_np, pred_quat: ref_rot }) pc_np = pc_np_unrot if cfg.pc_rgb: gt_image = input_image_np elif cfg.vis_depth_projs: gt_image = depth_np else: gt_image = gt_mask_np if pose_num_candidates == 1: view_j = view_idx * 2 // plot_w view_i = view_idx * 2 % plot_w gt_image = np.squeeze(gt_image) grid[view_j, view_i] = mask4vis(cfg, gt_image, vis_size) curr_img = np.squeeze(proj_np) grid[view_j, view_i + 1] = mask4vis(cfg, curr_img, vis_size) if cfg.save_individual_images: curr_dir = os.path.join(save_dir, sample.name) if not os.path.exists(curr_dir): os.makedirs(curr_dir) imageio.imwrite( os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'rgb_gt')), mask4vis(cfg, np.squeeze(input_image_np), vis_size)) imageio.imwrite( os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'mask_pred')), mask4vis(cfg, np.squeeze(proj_np), vis_size)) else: view_j = view_idx gt_image = np.squeeze(gt_image) grid[view_j, 0] = mask4vis(cfg, gt_image, vis_size) for kk in range(pose_num_candidates): curr_img = np.squeeze(proj_np[kk, :, :, :]) grid[view_j, kk + 1] = mask4vis(cfg, curr_img, vis_size) if cfg.save_individual_images: curr_dir = os.path.join(save_dir, sample.name) if not os.path.exists(curr_dir): os.makedirs(curr_dir) imageio.imwrite( os.path.join( curr_dir, '{}_{}_{}.png'.format(view_idx, kk, 'mask_pred')), mask4vis(cfg, np.squeeze(curr_img), vis_size)) if cfg.save_individual_images: imageio.imwrite( os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'mask_gt')), mask4vis(cfg, np.squeeze(gt_mask_np), vis_size)) if pose_student: grid[view_j, -1] = mask4vis(cfg, np.squeeze(proj_student_np), vis_size) if save_pred: all_pcs[view_idx, :, :] = np.squeeze(pc_np) all_z_latent[view_idx] = z_latent_np if cfg.predict_pose: all_cameras[view_idx, :] = predicted_camera if save_voxels: # multiplying by two is necessary because # pc->voxel conversion expects points in [-1, 1] range pc_np_range = pc_np if not fast_conversion: pc_np_range *= 2.0 voxels_np = sess.run(voxels, feed_dict={input_pc: pc_np_range}) all_voxels[view_idx, :, :, :] = np.squeeze(voxels_np) vis_view = view_idx == 0 or cfg.vis_all_views if cfg.vis_voxels and vis_view: rgb_np = np.squeeze(rgb_np) if cfg.pc_rgb else None vis_pc(np.squeeze(pc_np), rgb=rgb_np) grid_merged = merge_grid(cfg, grid) imageio.imwrite("{}/{}_proj.png".format(save_dir, sample.name), grid_merged) if save_pred: if cfg.save_as_mat: save_dict = {"points": all_pcs, "z_latent": all_z_latent} if cfg.predict_pose: save_dict["camera_pose"] = all_cameras scipy.io.savemat("{}/{}_pc".format(save_pred_dir, sample.name), mdict=save_dict) else: np.savez("{}/{}_pc".format(save_pred_dir, sample.name), all_pcs) if save_voxels: np.savez("{}/{}_vox".format(save_pred_dir, sample.name), all_voxels) sess.close()
def train(): cfg = app_config setup_environment(cfg) train_dir = cfg.checkpoint_dir mkdir_if_missing(train_dir) tf.logging.set_verbosity(tf.logging.INFO) split_name = "train" dataset_file = os.path.join(cfg.inp_dir, f"{cfg.synth_set}_{split_name}.tfrecords") dataset = tf.data.TFRecordDataset(dataset_file, compression_type=tf_record_compression(cfg)) if cfg.shuffle_dataset: dataset = dataset.shuffle(7000) dataset = dataset.map(lambda rec: parse_tf_records(cfg, rec), num_parallel_calls=3) \ .batch(cfg.batch_size) \ .prefetch(buffer_size=100) \ .repeat() iterator = dataset.make_one_shot_iterator() train_data = iterator.get_next() summary_writer = tfsum.create_file_writer(train_dir, flush_millis=10000) with summary_writer.as_default(), tfsum.record_summaries_every_n_global_steps(10): global_step = tf.train.get_or_create_global_step() print("global step: ", global_step) model = model_pc.ModelPointCloud(cfg, global_step) inputs = model.preprocess(train_data, cfg.step_size) model_fn = model.get_model_fn( is_training=True, reuse=False, run_projection=True) outputs = model_fn(inputs) # train_scopes train_scopes = ["decoder/point_cloud"] # train_scopes = ["decoder"] # loss task_loss = model.get_loss(inputs, outputs) reg_loss = regularization_loss(train_scopes, cfg) loss = task_loss + reg_loss # summary op summary_op = tfsum.all_summary_ops() # optimizer var_list = get_trainable_variables(train_scopes) optimizer = tf.train.AdamOptimizer(get_learning_rate(cfg, global_step)) train_op = optimizer.minimize(loss, global_step, var_list) # saver max_to_keep = 2 saver = tf.train.Saver(max_to_keep=max_to_keep) session_config = tf.ConfigProto( log_device_placement=False) session_config.gpu_options.allow_growth = cfg.gpu_allow_growth session_config.gpu_options.per_process_gpu_memory_fraction = cfg.per_process_gpu_memory_fraction sess = tf.Session(config=session_config) with sess, summary_writer.as_default(): tf.global_variables_initializer().run() tf.local_variables_initializer().run() tfsum.initialize(graph=tf.get_default_graph()) ## TODO load pretrain variables_to_restore = slim.get_variables_to_restore(exclude=["meta"]) #, "decoder/point_cloud" print(variables_to_restore) restorer = tf.train.Saver(variables_to_restore) checkpoint_file = "model-800000" # checkpoint_file = "model-665000" # checkpoint_file = "model-600000" #tf.train.latest_checkpoint(cfg.checkpoint_dir) print("restoring checkpoint", checkpoint_file) restorer.restore(sess, checkpoint_file) global_step_val = 0 while global_step_val < cfg.max_number_of_steps: t0 = time.perf_counter() _, loss_val, global_step_val, summary = sess.run([train_op, loss, global_step, summary_op]) t1 = time.perf_counter() dt = t1 - t0 print(f"step: {global_step_val}, loss = {loss_val:.4f} ({dt:.3f} sec/step), lr = {sess.run(optimizer._lr)}") if global_step_val % 5000 == 0: saver.save(sess, f"{train_dir}/model", global_step=global_step_val)
def compute_predictions(): cfg = app_config setup_environment(cfg) exp_dir = cfg.checkpoint_dir cfg.batch_size = 1 cfg.step_size = 1 pc_num_points = cfg.pc_num_points vox_size = cfg.vox_size save_pred = cfg.save_predictions save_voxels = cfg.save_voxels fast_conversion = True pose_student = cfg.pose_predictor_student and cfg.predict_pose device = 'cuda' if torch.cuda.is_available() else 'cpu' model = model_pc.ModelPointCloud(cfg) model = model.to(device) log_dir = '../../dpc/run/model_run_data/' learning_rate = 1e-4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = cfg.weight_decay) global_step = 100000 if global_step>0: checkpoint_path = os.path.join(log_dir,'model.ckpt_{}.pth'.format(global_step)) print("Loading from path:",checkpoint_path) checkpoint = torch.load(checkpoint_path) global_step_val = checkpoint['global_step'] model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) loss = checkpoint['loss'] else: global_step_val = global_step print('Restored checkpoint at {} with loss {}'.format(global_step, loss)) save_dir = os.path.join(exp_dir, '{}_vis_proj'.format(cfg.save_predictions_dir)) mkdir_if_missing(save_dir) save_pred_dir = os.path.join(exp_dir, cfg.save_predictions_dir) mkdir_if_missing(save_pred_dir) vis_size = cfg.vis_size split_name = "val" dataset_folder = cfg.inp_dir dataset = ShapeRecords(dataset_folder, cfg, split_name) dataset_loader = torch.utils.data.DataLoader(dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle_dataset, num_workers=4,drop_last=True) pose_num_candidates = cfg.pose_predict_num_candidates num_views = cfg.num_views plot_h = 4 plot_w = 6 num_views = int(min(num_views, plot_h * plot_w / 2)) if cfg.models_list: model_names = parse_lines(cfg.models_list) else: model_names = dataset.file_names num_models = len(model_names) for k in range(num_models): model_name = model_names[k] sample = dataset.__getitem__(k) images = sample['image'] masks = sample['mask'] if cfg.saved_camera: cameras = sample['extrinsic'] cam_pos = sample['cam_pos'] if cfg.vis_depth_projs: depths = sample['depth'] if cfg.variable_num_views: num_views = sample['num_views'] print("{}/{} {}".format(k, num_models, model_name)) if pose_num_candidates == 1: grid = np.empty((plot_h, plot_w), dtype=object) else: plot_w = pose_num_candidates + 1 if pose_student: plot_w += 1 grid = np.empty((num_views, plot_w), dtype=object) if save_pred: all_pcs = np.zeros((num_views, pc_num_points, 3)) all_cameras = np.zeros((num_views, 4)) #all_voxels = np.zeros((num_views, vox_size, vox_size, vox_size)) #all_z_latent = np.zeros((num_views, cfg.fc_dim)) for view_idx in range(num_views): input_image_np = images[[view_idx], :, :, :] gt_mask_np = masks[[view_idx], :, :, :] if cfg.saved_camera: extr_mtr = cameras[view_idx, :, :] cam_quaternion_np = quaternion_from_campos(cam_pos[view_idx, :]) cam_quaternion_np = np.expand_dims(cam_quaternion_np, axis=0) else: extr_mtr = np.zeros((4, 4)) code = 'images' if cfg.predict_pose else 'images_1' input = {code: input_image_np, 'matrices': extr_mtr, 'camera_quaternion': cam_quaternion_np} out = build_model(model, input, global_step) input_image = out["inputs"] cam_matrix = out["camera_extr_src"] cam_quaternion = out["cam_quaternion"] point_cloud = out["points_1"] #gb = out["rgb_1"] if cfg.pc_rgb else None #rojs = out["projs"] #rojs_rgb = out["projs_rgb"] #rojs_depth = out["projs_depth"] cam_transform = out["cam_transform"] #_latent = out["z_latent"] #if cfg.pc_rgb: # proj_tensor = projs_rgb #elif cfg.vis_depth_projs: # proj_tensor = projs_depth #else: # proj_tensor = projs if pose_student: camera_student_np = out["pose_student"] predicted_camera = camera_student_np else: predicted_camera = cam_transf_np #if cfg.vis_depth_projs: # proj_np = normalise_depthmap(out["projs"]) # if depths is not None: # depth_np = depths[view_idx, :, :, :] # depth_np = normalise_depthmap(depth_np) # else: # depth_np = 1.0 - np.squeeze(gt_mask_np) # if pose_student: # proj_student_np = normalise_depthmap(proj_student_np) #if save_voxels: # if fast_conversion: # voxels, _ = pointcloud2voxels3d_fast(cfg, input_pc, None) # voxels = tf.expand_dims(voxels, axis=-1) # voxels = smoothen_voxels3d(cfg, voxels, model.gauss_kernel()) # else: # voxels = pointcloud2voxels(cfg, input_pc, model.gauss_sigma()) if cfg.predict_pose: if cfg.save_rotated_points: ref_rot = scipy.io.loadmat("{}/final_reference_rotation.mat".format(exp_dir)) ref_rot = ref_rot["rotation"] pc_unrot = quaternion_rotate(input_pc, ref_quat) point_cloud = pc_np_unrot if cfg.pc_rgb: gt_image = input_image_np elif cfg.vis_depth_projs: gt_image = depth_np else: gt_image = gt_mask_np # if pose_num_candidates == 1: # view_j = view_idx * 2 // plot_w # view_i = view_idx * 2 % plot_w # gt_image = np.squeeze(gt_image) # grid[view_j, view_i] = mask4vis(cfg, gt_image, vis_size) # curr_img = np.squeeze(out[projs]) # grid[view_j, view_i + 1] = mask4vis(cfg, curr_img, vis_size) # if cfg.save_individual_images: # curr_dir = os.path.join(save_dir, model_names[k]) # if not os.path.exists(curr_dir): # os.makedirs(curr_dir) # imageio.imwrite(os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'rgb_gt')), # mask4vis(cfg, np.squeeze(input_image_np), vis_size)) # imageio.imwrite(os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'mask_pred')), # mask4vis(cfg, np.squeeze(proj_np), vis_size)) # else: # view_j = view_idx # gt_image = np.squeeze(gt_image) # grid[view_j, 0] = mask4vis(cfg, gt_image, vis_size) # for kk in range(pose_num_candidates): # curr_img = np.squeeze(out["projs"][kk, :, :, :].detach().cpu()) # grid[view_j, kk + 1] = mask4vis(cfg, curr_img, vis_size) # if cfg.save_individual_images: # curr_dir = os.path.join(save_dir, model_names[k]) # if not os.path.exists(curr_dir): # os.makedirs(curr_dir) # imageio.imwrite(os.path.join(curr_dir, '{}_{}_{}.png'.format(view_idx, kk, 'mask_pred')), # mask4vis(cfg, np.squeeze(curr_img), vis_size)) # if cfg.save_individual_images: # imageio.imwrite(os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'mask_gt')), # mask4vis(cfg, np.squeeze(gt_mask_np), vis_size)) # if pose_student: # grid[view_j, -1] = mask4vis(cfg, np.squeeze(proj_student_np.detach().cpu()), vis_size) if save_pred: #pc_np = pc_np.detach().cpu().numpy() all_pcs[view_idx, :, :] = np.squeeze(point_cloud.detach().cpu()) #all_z_latent[view_idx] = z_latent.detach().cpu() if cfg.predict_pose: all_cameras[view_idx, :] = predicted_camera.detach().cpu() # if save_voxels: # # multiplying by two is necessary because # # pc->voxel conversion expects points in [-1, 1] range # pc_np_range = pc_np # if not fast_conversion: # pc_np_range *= 2.0 # voxels_np = sess.run(voxels, feed_dict={input_pc: pc_np_range}) # all_voxels[view_idx, :, :, :] = np.squeeze(voxels_np) # vis_view = view_idx == 0 or cfg.vis_all_views # if cfg.vis_voxels and vis_view: # rgb_np = np.squeeze(rgb_np) if cfg.pc_rgb else None # vis_pc(np.squeeze(pc_np), rgb=rgb_np) #grid_merged = merge_grid(cfg, grid) #imageio.imwrite("{}/{}_proj.png".format(save_dir, sample.file_names), grid_merged) if save_pred: if 0: save_dict = {"points": all_pcs} if cfg.predict_pose: save_dict["camera_pose"] = all_cameras scipy.io.savemat("{}/{}_pc.mat".format(save_pred_dir, model_names[k]), mdict=save_dict) else: save_dict = {"points": all_pcs} if cfg.predict_pose: save_dict["camera_pose"] = all_cameras with open("{}/{}_pc.pkl".format(save_pred_dir, model_names[k]), 'wb') as handle: pickle.dump(save_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
def train(): cfg = app_config setup_environment(cfg) device = 'cuda' if torch.cuda.is_available() else 'cpu' train_dir = cfg.checkpoint_dir mkdir_if_missing(train_dir) split_name = "train" dataset_folder = cfg.inp_dir dataset = ShapeRecords(dataset_folder, cfg, split_name) dataset_loader = torch.utils.data.DataLoader(dataset, batch_size=cfg.batch_size, shuffle=cfg.shuffle_dataset, num_workers=4, drop_last=True) summary_writer = SummaryWriter(log_dir=train_dir, flush_secs=10) ckpt_count = 1000 summary_count = 100 # loading pre existing model # creating a new model model = model_pc.ModelPointCloud(cfg) model = model.to(device) print(model.parameters) log_dir = '../../dpc/run/model_run_data_lamp/' mkdir_if_missing(log_dir) learning_rate = 1e-4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=cfg.weight_decay) # training steps global_step = 38000 if global_step > 0: checkpoint_path = os.path.join(log_dir, 'model.ckpt_{}.pth'.format(global_step)) print("Loading from path:", checkpoint_path) checkpoint = torch.load(checkpoint_path) global_step_val = checkpoint['global_step'] model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) else: global_step_val = global_step model.train() while global_step_val < cfg.max_number_of_steps: step_loss = 0.0 for i, train_data in enumerate(dataset_loader, 0): t9 = time.perf_counter() for k in train_data.keys(): try: train_data[k] = train_data[k].to(device) except AttributeError: pass # get inputs by data processing t0 = time.perf_counter() inputs = model.preprocess(train_data, cfg.step_size) t1 = time.perf_counter() # zero the parameter gradients optimizer.zero_grad() t2 = time.perf_counter() # dummy loss function if global_step_val % summary_count == 0: outputs = model(inputs, global_step_val, is_training=True, run_projection=True, summary_writer=summary_writer) loss, min_loss = model.get_loss(inputs, outputs, summary_writer, add_summary=True, global_step=global_step_val) summary_writer.add_image( 'prediction', outputs['projs'].detach().cpu().numpy()[ min_loss[0]].transpose(2, 0, 1), global_step_val) summary_writer.add_image( 'actual', inputs['masks'].detach().cpu().numpy()[0].transpose( 2, 0, 1), global_step_val) #print(chamfer_distance( outputs['projs'].detach().cpu().numpy()[min_loss[0]].transpose(2, 0, 1), inputs['masks'].detach().cpu().numpy()[0].transpose(2, 0, 1)) else: outputs = model(inputs, global_step_val, is_training=True, run_projection=True) loss, _ = model.get_loss(inputs, outputs, add_summary=False) loss.backward() optimizer.step() del inputs del outputs t3 = time.perf_counter() dt = t3 - t9 #print('Cuda {}'.format(t0-t9)) #print('Preprocess {}'.format(t1-t0)) #print('Forward {}'.format(t2-t1)) #print('Backward {}'.format(t3-t2)) step_loss += loss.item() loss_avg = step_loss / (i + 1) print( f"step: {global_step_val}, loss= {loss.item():.5f}, loss_average = {loss_avg:.4f} ({dt:.3f} sec/step)" ) if global_step_val % ckpt_count == 0: # save configuration checkpoint_path = os.path.join( log_dir, 'model.ckpt_{}.pth'.format(global_step_val)) print("PATH:", checkpoint_path) torch.save( { 'global_step': global_step_val, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss_avg }, checkpoint_path) global_step_val += 1
def create_record(synth_set, split_name, models): im_size = FLAGS.image_size num_views = FLAGS.num_views num_models = len(models) mkdir_if_missing(FLAGS.out_dir) # address to save the TFRecords file train_filename = "{}/{}_{}.tfrecords".format(FLAGS.out_dir, synth_set, split_name) # open the TFRecords file options = tf_record_options(FLAGS) writer = tf.python_io.TFRecordWriter(train_filename, options=options) render_dir = os.path.join(FLAGS.inp_dir_renders, synth_set) voxel_dir = os.path.join(FLAGS.inp_dir_voxels, synth_set) for j, model in enumerate(models): print("{}/{}".format(j, num_models)) if FLAGS.store_voxels: voxels_file = os.path.join(voxel_dir, "{}.mat".format(model)) voxels = loadmat(voxels_file)["Volume"].astype(np.float32) # this needed to be compatible with the # PTN projections voxels = np.transpose(voxels, (1, 0, 2)) voxels = np.flip(voxels, axis=1) im_dir = os.path.join(render_dir, model) images = sorted(glob.glob("{}/render_*.png".format(im_dir))) rgbs = np.zeros((num_views, im_size, im_size, 3), dtype=np.float32) masks = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32) cameras = np.zeros((num_views, 4, 4), dtype=np.float32) cam_pos = np.zeros((num_views, 3), dtype=np.float32) depths = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32) assert (len(images) >= num_views) for k in range(num_views): im_file = images[k] img = imread(im_file) rgb = img[:, :, 0:3] mask = img[:, :, [3]] mask = mask / 255.0 if True: # white background mask_fg = np.repeat(mask, 3, 2) mask_bg = 1.0 - mask_fg rgb = rgb * mask_fg + np.ones(rgb.shape) * 255.0 * mask_bg # plt.imshow(rgb.astype(np.uint8)) # plt.show() rgb = rgb / 255.0 actual_size = rgb.shape[0] if im_size != actual_size: rgb = im_resize(rgb, (im_size, im_size), order=3) mask = im_resize(mask, (im_size, im_size), order=3) rgbs[k, :, :, :] = rgb masks[k, :, :, :] = mask fn = os.path.basename(im_file) img_idx = int(re.search(r'\d+', fn).group()) if FLAGS.store_camera: cam_file = "{}/camera_{}.mat".format(im_dir, img_idx) cam_extr, pos = read_camera(cam_file) cameras[k, :, :] = cam_extr cam_pos[k, :] = pos if FLAGS.store_depth: depth_file = "{}/depth_{}.png".format(im_dir, img_idx) depth = loadDepth(depth_file) d_max = 10.0 d_min = 0.0 depth = (depth - d_min) / d_max depth_r = im_resize(depth, (im_size, im_size), order=0) depth_r = depth_r * d_max + d_min depths[k, :, :] = np.expand_dims(depth_r, -1) # Create a feature feature = { "image": _dtype_feature(rgbs), "mask": _dtype_feature(masks), "name": _string_feature(model) } if FLAGS.store_voxels: feature["vox"] = _dtype_feature(voxels) if FLAGS.store_camera: # feature["extrinsic"] = _dtype_feature(extrinsic) feature["extrinsic"] = _dtype_feature(cameras) feature["cam_pos"] = _dtype_feature(cam_pos) if FLAGS.store_depth: feature["depth"] = _dtype_feature(depths) # Create an example protocol buffer example = tf.train.Example(features=tf.train.Features(feature=feature)) # Serialize to string and write on the file writer.write(example.SerializeToString()) """ plt.imshow(np.squeeze(img[:,:,0:3])) plt.show() plt.imshow(np.squeeze(img[:,:,3]).astype(np.float32)/255.0) plt.show() """ writer.close() sys.stdout.flush()