def save_network(self, dcn, optimizer, iteration, logging_dict=None): """ Saves network parameters to logging directory :return: :rtype: None """ network_param_file = os.path.join( self._logging_dir, utils.getPaddedString(iteration, width=6) + ".pth") optimizer_param_file = network_param_file + ".opt" torch.save(dcn.state_dict(), network_param_file) torch.save(optimizer.state_dict(), optimizer_param_file) # also save loss history stuff if logging_dict is not None: log_history_file = os.path.join( self._logging_dir, utils.getPaddedString(iteration, width=6) + "_log_history.yaml") utils.saveToYaml(logging_dict, log_history_file) current_loss_file = os.path.join(self._logging_dir, 'loss.yaml') current_loss_data = self._get_current_loss(logging_dict) utils.saveToYaml(current_loss_data, current_loss_file)
def extract_descriptor_images_for_scene(dcn, dataset, scene_name, save_dir, overwrite=False): """ Save the descriptor images for a scene at the given directory :param dcn: :type dcn: :param dataset: :type dataset: :param scene_name: :type scene_name: :param save_dir: Absolute path of where to save images :type save_dir: :return: :rtype: """ pose_data = dataset.get_pose_data(scene_name) image_idxs = pose_data.keys() image_idxs.sort() num_images = len(pose_data) logging_frequency = 50 start_time = time.time() # make the mesh_descriptors dir if it doesn't already exist if os.path.exists(save_dir): if not overwrite: raise ValueError( "save_dir %s already exists and overwrite is False" % (save_dir)) else: shutil.rmtree(save_dir) os.makedirs(save_dir) for counter, img_idx in enumerate(image_idxs): if (counter % logging_frequency) == 0: print("processing image %d of %d" % (counter, num_images)) rgb_img = dataset.get_rgb_image_from_scene_name_and_idx( scene_name, img_idx) # note that this has already been normalized rgb_img_tensor = dataset.rgb_image_to_tensor(rgb_img) res = dcn.forward_single_image_tensor(rgb_img_tensor).data descriptor_image_filename = utils.getPaddedString( img_idx, width=SpartanDataset.PADDED_STRING_WIDTH) + "_descriptor.npy" full_filepath = os.path.join(save_dir, descriptor_image_filename) np.save(full_filepath, res.cpu()) elapsed_time = time.time() - start_time print("computing descriptor images took %d seconds" % (elapsed_time))
def mesh_cells_image_filename(self, img_idx): """ Returns the full filename for the cell labels image :param img_idx: :type img_idx: :return: :rtype: """ filename = utils.getPaddedString(img_idx) + "_mesh_cells.png" return os.path.join(self.rendered_images_dir, filename)
def render_depth_images(self, output_dir=None, rendered_images_dir=None): """ Note: This is actually rendering against whatever is saved as self.foreground_reconstruction Renders the depth images on the entire scene processed/rendered_images/000000_depth.png :return: """ if output_dir is None: output_dir = os.path.join(self.foreground_reconstruction.data_dir, 'image_masks') if rendered_images_dir is None: rendered_images_dir = os.path.join( self.foreground_reconstruction.data_dir, 'rendered_images') start_time = time.time() start_time = time.time() # read in each image in the log image_dir = self.foreground_reconstruction.image_dir camera_pose_data = self.foreground_reconstruction.kinematics_pose_data img_file_extension = 'png' num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses( ) logging_rate = 50 counter = 0 for idx, value in camera_pose_data.pose_dict.iteritems(): if (counter % logging_rate) == 0: print "Rendering depth image for pose %d of %d" % (counter + 1, num_poses) camera_to_world = self.foreground_reconstruction.get_camera_to_world( idx) self.setCameraTransform(camera_to_world) depth_img = self.depthScanners[ 'foreground'].getDepthImageAsNumpyArray() depth_img_filename = os.path.join( rendered_images_dir, utils.getPaddedString(idx) + '_depth' + "." + img_file_extension) cv2.imwrite(depth_img_filename, depth_img) counter += 1 end_time = time.time() print "rendering depth images took %d seconds" % (end_time - start_time)
def save_network(self, dcn, optimizer, iteration, logging_dict=None): """ Saves network parameters to logging directory :return: :rtype: None """ network_param_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + ".pth") optimizer_param_file = network_param_file + ".opt" torch.save(dcn.state_dict(), network_param_file) torch.save(optimizer.state_dict(), optimizer_param_file) # also save loss history stuff if logging_dict is not None: log_history_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + "_log_history.yaml") utils.saveToYaml(logging_dict, log_history_file) current_loss_file = os.path.join(self._logging_dir, 'loss.yaml') current_loss_data = self._get_current_loss(logging_dict) utils.saveToYaml(current_loss_data, current_loss_file)
def load_pretrained(self, model_folder, iteration=None): """ Loads network and optimizer parameters from a previous training run. Note: It is up to the user to ensure that the model parameters match. e.g. width, height, descriptor dimension etc. :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/ :type model_folder: :param iteration: which index to use, e.g. 3500, if None it loads the latest one :type iteration: :return: iteration :rtype: """ if not os.path.isdir(model_folder): pdc_path = utils.getPdcPath() model_folder = os.path.join(pdc_path, "trained_models", model_folder) # find idx.pth and idx.pth.opt files if iteration is None: files = os.listdir(model_folder) model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1] iteration = int(model_param_file.split(".")[0]) optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1] else: prefix = utils.getPaddedString(iteration, width=6) model_param_file = prefix + ".pth" optim_param_file = prefix + ".pth.opt" print("model_param_file", model_param_file) model_param_file = os.path.join(model_folder, model_param_file) optim_param_file = os.path.join(model_folder, optim_param_file) self._dcn = self.build_network() self._dcn.load_state_dict(torch.load(model_param_file)) self._dcn.cuda() self._dcn.train() self._optimizer = self._construct_optimizer(self._dcn.parameters()) self._optimizer.load_state_dict(torch.load(optim_param_file)) return iteration
def load_pretrained(self, model_folder, iteration=None): """ Loads network and optimizer parameters from a previous training run. Note: It is up to the user to ensure that the model parameters match. e.g. width, height, descriptor dimension etc. :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/ :type model_folder: :param iteration: which index to use, e.g. 3500, if None it loads the latest one :type iteration: :return: iteration :rtype: """ if not os.path.isdir(model_folder): pdc_path = utils.getPdcPath() model_folder = os.path.join(pdc_path, "trained_models", model_folder) # find idx.pth and idx.pth.opt files if iteration is None: files = os.listdir(model_folder) model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1] iteration = int(model_param_file.split(".")[0]) optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1] else: prefix = utils.getPaddedString(iteration, width=6) model_param_file = prefix + ".pth" optim_param_file = prefix + ".pth.opt" print "model_param_file", model_param_file model_param_file = os.path.join(model_folder, model_param_file) optim_param_file = os.path.join(model_folder, optim_param_file) self._dcn = self.build_network() self._dcn.load_state_dict(torch.load(model_param_file)) self._dcn.cuda() self._dcn.train() self._optimizer = self._construct_optimizer(self._dcn.parameters()) self._optimizer.load_state_dict(torch.load(optim_param_file)) return iteration
def mesh_descriptors_filename(self, network_name, img_idx): """ Returns the full filename for the .npz file that contains two arrays .npz reference https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.savez.html#numpy.savez D = descriptor dimension - cell_ids: np.array of size N, dtype=np.int64 - cell_descriptors: np.array with np.shape = [N,D dtype = np.float64 - :param img_idx: :type img_idx: :return: :rtype: """ filename = utils.getPaddedString(img_idx) + "_mesh_descriptors.npz" return os.path.join(self.mesh_descriptors_dir(network_name), filename)
def render_depth_images(self, output_dir=None, rendered_images_dir=None): """ Run the mask generation algorithm :return: """ if output_dir is None: output_dir = os.path.join(self.foreground_reconstruction.data_dir, 'image_masks') if rendered_images_dir is None: rendered_images_dir = os.path.join(self.foreground_reconstruction.data_dir, 'rendered_images') start_time = time.time() # read in each image in the log image_dir = self.foreground_reconstruction.image_dir camera_pose_data = self.foreground_reconstruction.kinematics_pose_data img_file_extension = 'png' num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses() logging_rate = 50 counter = 0 for idx, value in camera_pose_data.pose_dict.iteritems(): if (counter % logging_rate) == 0: print "Rendering depth image for pose %d of %d" % (counter + 1, num_poses) camera_to_world = self.foreground_reconstruction.get_camera_to_world(idx) self.setCameraTransform(camera_to_world) depth_img = self.depthScanners['foreground'].getDepthImageAsNumpyArray() depth_img_filename = os.path.join(rendered_images_dir, utils.getPaddedString(idx) + '_depth' + "." + img_file_extension) cv2.imwrite(depth_img_filename, depth_img) counter += 1 end_time = time.time() print "rendering depth images took %d seconds" % (end_time - start_time)
def run(self, output_dir=None, rendered_images_dir=None): """ Run the mask generation algorithm :return: """ if output_dir is None: output_dir = os.path.join(self.foreground_reconstruction.data_dir, 'image_masks') if rendered_images_dir is None: rendered_images_dir = os.path.join( self.foreground_reconstruction.data_dir, 'rendered_images') if not os.path.isdir(output_dir): os.makedirs(output_dir) if not os.path.isdir(rendered_images_dir): os.makedirs(rendered_images_dir) start_time = time.time() # read in each image in the log image_dir = self.foreground_reconstruction.image_dir camera_pose_data = self.foreground_reconstruction.kinematics_pose_data img_file_extension = 'png' num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses( ) logging_rate = 50 counter = 0 for idx, value in camera_pose_data.pose_dict.iteritems(): if (counter % logging_rate) == 0: print "Rendering mask for pose %d of %d" % (counter + 1, num_poses) mask_image_filename = utils.getPaddedString( idx) + "_mask" + "." + img_file_extension mask_image_full_filename = os.path.join(output_dir, mask_image_filename) camera_to_world = self.foreground_reconstruction.get_camera_to_world( idx) self.setCameraTransform(camera_to_world) d = self.computeForegroundMaskUsingCropStrategy(visualize=False) mask = d['mask'] visible_mask = mask * 255 visible_mask_filename = os.path.join( output_dir, utils.getPaddedString(idx) + '_visible_mask' + "." + img_file_extension) depth_img_filename = os.path.join( rendered_images_dir, utils.getPaddedString(idx) + '_depth_cropped' + "." + img_file_extension) # save the images cv2.imwrite(mask_image_full_filename, mask) cv2.imwrite(visible_mask_filename, visible_mask) # make sure to save this as uint16 depth_img = d['depth_img_foreground_raw'] cv2.imwrite(depth_img_filename, depth_img) counter += 1 end_time = time.time() print "rendering masks took %d seconds" % (end_time - start_time)
def run(self, output_dir=None, rendered_images_dir=None): """ Run the mask generation algorithm :return: """ if output_dir is None: output_dir = os.path.join(self.foreground_reconstruction.data_dir, 'image_masks') if rendered_images_dir is None: rendered_images_dir = os.path.join(self.foreground_reconstruction.data_dir, 'rendered_images') if not os.path.isdir(output_dir): os.makedirs(output_dir) if not os.path.isdir(rendered_images_dir): os.makedirs(rendered_images_dir) start_time = time.time() # read in each image in the log image_dir = self.foreground_reconstruction.image_dir camera_pose_data = self.foreground_reconstruction.kinematics_pose_data img_file_extension = 'png' num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses() logging_rate = 50 counter = 0 for idx, value in camera_pose_data.pose_dict.iteritems(): if (counter % logging_rate) == 0: print "Rendering mask for pose %d of %d" %(counter + 1, num_poses) mask_image_filename = utils.getPaddedString(idx) + "_mask" + "." + img_file_extension mask_image_full_filename = os.path.join(output_dir, mask_image_filename) camera_to_world = self.foreground_reconstruction.get_camera_to_world(idx) self.setCameraTransform(camera_to_world) d = self.computeForegroundMaskUsingCropStrategy(visualize=False) mask = d['mask'] visible_mask = mask*255 visible_mask_filename = os.path.join(output_dir, utils.getPaddedString(idx) + '_visible_mask' + "." + img_file_extension) depth_img_filename = os.path.join(rendered_images_dir, utils.getPaddedString(idx) + '_depth_cropped' + "." + img_file_extension) # save the images cv2.imwrite(mask_image_full_filename, mask) cv2.imwrite(visible_mask_filename, visible_mask) # make sure to save this as uint16 depth_img = d['depth_img_foreground_raw'] cv2.imwrite(depth_img_filename, depth_img) counter += 1 end_time = time.time() print "rendering masks took %d seconds" %(end_time - start_time)
def descriptor_image_filename(img_idx): filename = utils.getPaddedString(img_idx) + "_descriptor_image.npy" return filename