def _resume_training_state(self, resumed_checkpoint): """ States only for training pipeline like iteration counts, optimizers, and lr_schedulers are resumed in this function """ self.start_epoch = resumed_checkpoint['epoch'] + 1 self.monitor_best = resumed_checkpoint['monitor_best'] # Estimated iteration_count is based on length of the current data loader, # which will be wrong if the batch sizes between the two training processes are different. self.train_iteration_count = resumed_checkpoint.get( 'train_iteration_count', 0) self.valid_iteration_counts = resumed_checkpoint.get( 'valid_iteration_counts', [0] * len(self.valid_data_loaders)) self.valid_iteration_counts = list(self.valid_iteration_counts) # load optimizer state from resumed_checkpoint only when optimizer type is not changed. optimizers_ckpt = resumed_checkpoint['optimizers'] for key in global_config['optimizers'].keys(): if key not in optimizers_ckpt.keys(): logger.warning( f'Optimizer name {key} in config file is not in checkpoint (not resumed)' ) elif resumed_checkpoint['config']['optimizers'][key][ 'type'] != global_config['optimizers'][key]['type']: logger.warning( f'Optimizer type in config file is different from that of checkpoint (not resumed)' ) else: self.optimizers[key].load_state_dict(optimizers_ckpt[key])
def __init__(self, dir_name, read=True, max_length=None, sample_period=1, evaluate_image=False): self.dir_name = dir_name self.count = 0 self.max_length = max_length self.filenames = [] self.sample_period = sample_period if read: if os.path.exists(dir_name): # self.filenames = read_filenames_from_dir(dir_name, self.__class__.__name__) # ^^^^^ yield None when reading some videos of face forensics data # (related to 'Too many levels of symbolic links'?) self.filenames = sorted(glob(os.path.join(dir_name, '*'))) self.filenames = [ f for f in self.filenames if os.path.isfile(f) ] self.filenames = self.filenames[::sample_period][:max_length] if evaluate_image == True: self.files = self.read_files(self.filenames) else: self.files = [] logger.warning(f"Directory {dir_name} not exists!") else: self.files = [] self.current_index = 0
def save_video_to_frames(video_filename, output_dir, max_len, min_h, min_w, prefix=''): video_name = prefix + video_filename.split('/')[-1].split('.')[0] cap = cv2.VideoCapture(video_filename) frame_count = 1 video_dir = os.path.join(output_dir, video_name) while frame_count <= max_len: ret, img = cap.read() if not ret: logger.warning( f"{video_filename} len {frame_count} < max_len {max_len}") break h, w, c = img.shape if h < min_h or w < min_w: logger.warning(f"h {h} < min_h {min_h} or w {w} < min_w {min_w}") break make_dirs(video_dir) output_filename = os.path.join(video_dir, f"{frame_count:04d}.png") logger.debug(f" Saving {output_filename}") cv2.imwrite(output_filename, img) frame_count += 1
def _resume_model_params(self, resumed_checkpoint): """ Load model parameters from resumed checkpoint """ # load architecture params from resumed_checkpoint. if resumed_checkpoint['config']['arch'] != global_config['arch']: logger.warning( 'Warning: Architecture config given in config file is different from that of resumed_checkpoint. ' 'This may yield an exception while state_dict is being loaded.' ) model = self._get_non_parallel_model() model.load_state_dict(resumed_checkpoint['state_dict'])
def _process_vds(self, vds): sample_period = random.randint(1, self.random_sample_period_max) gt_reader = FrameReader(vds.frames_dir, sample_period=sample_period) # print(f"s{sample_period}, len{len(gt_reader)}") video_length = len(gt_reader) start, end = self._get_sample_index_from_video(video_length) gt_frames = gt_reader[start:end] if len(gt_frames) < self.sample_length: logger.warning( f"len frames {len(gt_frames)} reader {len(gt_reader)} < sample_length {self.sample_length}" f" dir {vds.frames_dir}") if self.train: masks = create_random_shape_with_random_motion( end - start, imageHeight=self.size[1], imageWidth=self.size[0]) else: masks = self._get_masks(self.size, start, end, vds.mask_dir) if self.do_augment: gt_frames = self._transform(gt_frames) # Edge guidance guidances = [] if self.guidance == "edge": for frame in gt_frames: edge = canny(rgb2gray(np.array(frame)), sigma=self.sigma) edge = Image.fromarray(edge.astype(np.uint8)) guidances.append(edge) guidances = self._to_tensors(guidances) elif self.guidance == "landmarks": from utils.face import get_landmarks_contour for frame in gt_frames: edge = get_landmarks_contour(np.array(frame)) edge = Image.fromarray(edge.astype(np.uint8)) guidances.append(edge) guidances = self._to_tensors(guidances) # To tensors gt_tensors = self._to_tensors(gt_frames) mask_tensors = self._to_tensors(masks)[:video_length] # Deal with VOR test set problem: some ground truth videos are longer than masks if gt_tensors.shape[0] != mask_tensors.shape[0]: assert gt_tensors.shape[0] > mask_tensors.shape[0] gt_tensors = gt_tensors.narrow(0, 0, mask_tensors.shape[0]) # Mask input input_tensors = gt_tensors * mask_tensors return { "input_tensors": input_tensors, "mask_tensors": mask_tensors, "gt_tensors": gt_tensors, "guidances": guidances }
def extend_config(config, config_B): new_config = copy(config) for key, value in config_B.items(): if key in new_config.keys(): if key == 'name': value = f"{new_config[key]}_{value}" else: logger.warning(f"Overriding '{key}' in config") del new_config[key] new_config[key] = value return new_config
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): """Numpy implementation of the Frechet Distance. The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) and X_2 ~ N(mu_2, C_2) is d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). Stable version by Dougal J. Sutherland. Params: -- mu1 : Numpy array containing the activations of a layer of the inception net (like returned by the function 'get_predictions') for generated samples. -- mu2 : The sample mean over activations, precalculated on an representive data set. -- sigma1: The covariance matrix over activations for generated samples. -- sigma2: The covariance matrix over activations, precalculated on an representive data set. Returns: -- : The Frechet Distance. """ mu1 = np.atleast_1d(mu1) mu2 = np.atleast_1d(mu2) sigma1 = np.atleast_2d(sigma1) sigma2 = np.atleast_2d(sigma2) assert mu1.shape == mu2.shape, \ 'Training and test mean vectors have different lengths' assert sigma1.shape == sigma2.shape, \ 'Training and test covariances have different dimensions' diff = mu1 - mu2 # Product might be almost singular covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) if not np.isfinite(covmean).all(): msg = ('fid calculation produces singular product; ' 'adding %s to diagonal of cov estimates') % eps logger.warning(msg) offset = np.eye(sigma1.shape[0]) * eps covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) # Numerical error might give slight imaginary component if np.iscomplexobj(covmean): if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): m = np.max(np.abs(covmean.imag)) raise ValueError('Imaginary component {}'.format(m)) covmean = covmean.real tr_covmean = np.trace(covmean) return (diff.dot(diff) + np.trace(sigma1) + # NOQA np.trace(sigma2) - 2 * tr_covmean)
def _create_saving_dir(self, args): saving_dir = os.path.join(global_config['trainer']['save_dir'], args.outputs_subdir, global_config['name']) if os.path.exists(saving_dir): logger.warning( f'The saving directory "{saving_dir}" already exists. ' f'If continued, some files might be overwriten.') response = input('Proceed? [y/N] ') if response != 'y': logger.info('Exit.') exit() ensure_dir(saving_dir) if args.resume is not None: link = os.path.join(saving_dir, 'resumed_ckpt.pth') if os.path.exists(link): os.remove(link) # Mark the used resume path by a symbolic link os.symlink(os.path.abspath(args.resume), link) return saving_dir
def prepare_device(n_gpu_use): """ setup GPU device if available, move model into configured device """ n_gpu = torch.cuda.device_count() if n_gpu_use > 0 and n_gpu == 0: logger.warning( "Warning: There\'s no GPU available on this machine, training will be performed on CPU." ) n_gpu_use = 0 if n_gpu_use > n_gpu: msg = ( f"Warning: The number of GPU\'s configured to use is {n_gpu_use} " f"but only {n_gpu} are available on this machine.") logger.warning(msg) n_gpu_use = n_gpu device = torch.device('cuda:0' if n_gpu_use > 0 else 'cpu') list_ids = list(range(n_gpu_use)) return device, list_ids
def _setup_optimizers(self): """ Setup optimizers according to configuration. Each optimizer has its corresponding network(s) to train, specified by 'target_network' in configuraion. If no `target_network` is specified, all parameters of self.model will be included. """ self.optimizers = {} for name, entry in global_config['optimizers'].items(): model = self._get_non_parallel_model() if 'target_network' in entry.keys(): network = getattr(model, entry['target_network']) else: network = model logger.warning( f'Target network of optimizer "{name}" not specified. ' f'All params of self.model will be included.') trainable_params = filter(lambda p: p.requires_grad, network.parameters()) self.optimizers[name] = getattr(torch.optim, entry['type'])(trainable_params, **entry['args'])
def _check_and_save_best(self, epoch, worker_outputs): """ Evaluate model performance according to configured metric, save best checkpoint as model_best """ best = False if self.monitor_mode != 'off': try: metric_value = worker_outputs[self.monitored_loader]['log'][ self.monitored_metric] if (self.monitor_mode == 'min' and metric_value < self.monitor_best) or\ (self.monitor_mode == 'max' and metric_value > self.monitor_best): self.monitor_best = metric_value best = True except KeyError: if epoch == 1: msg = f"Warning: Can\'t recognize metric '{self.monitored_metric}' in '{self.monitored_loader}' "\ + f"for performance monitoring. model_best checkpoint won\'t be updated." logger.warning(msg) if epoch % self.save_freq == 0 or best: self._save_checkpoint(epoch, save_best=best)
def __init__( self, rids: RootInputDirectories, rods: RootOutputDirectories, args: dict, ): super().__init__(rids, rods, args) self.image_dir = rids.root_videos_dir self.mask_dir = rids.root_masks_dir self.max_num = args.get('max_num', None) self.image_filenames = get_everything_under( self.image_dir, pattern="*/*.jpg")[:self.max_num] self.mask_filenames = get_everything_under( self.mask_dir, pattern="*/*.png")[:self.max_num] if len(self.image_filenames) > len(self.mask_filenames): logger.warning( f"image num {len(self.image_filenames)} > mask num {len(self.mask_filenames)}" ) self.data_len = len(self.image_filenames) self.mask_len = len(self.mask_filenames)
def _process_vds(self, vds): sample_period = random.randint(1, self.random_sample_period_max) gt_reader = FrameReader(vds.frames_dir, sample_period=sample_period) # print(f"s{sample_period}, len{len(gt_reader)}") video_length = len(gt_reader) start, end = self._get_sample_index_from_video(video_length) gt_frames = gt_reader[start:end] if len(gt_frames) < self.sample_length: logger.warning( f"len frames {len(gt_frames)} reader {len(gt_reader)} < sample_length {self.sample_length}" f" dir {vds.frames_dir}") # read in flow and flow masks flow_reader = FlowReader(vds.flows_dir, sample_period=sample_period) flow_forward, flow_backward = flow_reader.get_samples(start, end) flowmask_reader = FlowmaskReader(vds.flowmasks_dir, sample_period=sample_period) flowmask_forward, flowmask_backward = flowmask_reader.get_samples( start, end) masks = self._get_masks(self.size, start, end, vds.mask_dir) input_tensor_dict = {} input_tensor_dict['gt_frames'] = gt_frames input_tensor_dict['flow_forward'] = flow_forward input_tensor_dict['flow_backward'] = flow_backward input_tensor_dict['flowmask_forward'] = flowmask_forward input_tensor_dict['flowmask_backward'] = flowmask_backward if self.do_augment: input_tensor_dict = self._transform(input_tensor_dict) else: input_tensor_dict = self._to_flow(input_tensor_dict) # Edge guidance guidances = [] if self.guidance == "edge": for frame in gt_frames: edge = canny(rgb2gray(np.array(frame)), sigma=self.sigma) edge = Image.fromarray(edge.astype(np.uint8)) guidances.append(edge) guidances = self._to_tensors(guidances) elif self.guidance == "landmarks": from utils.face import get_landmarks_contour for frame in gt_frames: edge = get_landmarks_contour(np.array(frame)) edge = Image.fromarray(edge.astype(np.uint8)) guidances.append(edge) guidances = self._to_tensors(guidances) # To tensors input_tensor_dict = self._to_tensors_dict(input_tensor_dict) mask_tensors = self._to_tensors(masks)[:video_length] gt_tensors = input_tensor_dict['gt_frames'] forward_flow = input_tensor_dict['flow_forward'] backward_flow = input_tensor_dict['flow_backward'] forward_flowmask = input_tensor_dict['flowmask_forward'] backward_flowmask = input_tensor_dict['flowmask_backward'] # Deal with VOR test set problem: some ground truth videos are longer than masks if gt_tensors.shape[0] != mask_tensors.shape[0]: assert gt_tensors.shape[0] > mask_tensors.shape[0] gt_tensors = gt_tensors.narrow(0, 0, mask_tensors.shape[0]) # Mask input input_tensors = gt_tensors * mask_tensors return { "input_tensors": input_tensors, "mask_tensors": mask_tensors, "gt_tensors": gt_tensors, 'forward_flow': forward_flow, 'backward_flow': backward_flow, 'forward_flowmask': forward_flowmask, 'backward_flowmask': backward_flowmask, "guidances": guidances }