def __init__(self, paths, interval, root_path, name=None): """ Parameters ---------- paths : list(str) List of key-paths to logging outputs. Will be expanded so they can be evaluated lazily. interval : int Intervall of training steps before logging. root_path : str Path at which the logs are stored. name : str Optional name to recognize logging output. """ self.paths = paths self.interval = interval self.root = root_path if name is not None: self.logger = get_logger(name) else: self.logger = get_logger(self) self.loggers = dict() for path in self.paths: os.makedirs(os.path.join(self.root, path), exist_ok=True) self.loggers[path] = get_logger(path) self.handlers = { "images": [self.log_images], "scalars": [self.log_scalars], "figures": [self.log_figures], }
def __init__( self, checkpoint_root, filter_cond=lambda c: True, interval=5, add_sec=5, callback=None, eval_all=False, ): """Args: checkpoint_root (str): Path to look for checkpoints. filter_cond (Callable): A function used to filter files, to only get the checkpoints that are wanted. interval (float): Number of seconds after which to check for a new checkpoint again. add_sec (float): Number of seconds to wait, after a checkpoint is found, to avoid race conditions, if the checkpoint is still being written at the time it's meant to be read. callback (Callable): Callback called with path of found checkpoint. eval_all (bool): Accept all instead of just latest checkpoint. """ self.root = checkpoint_root self._fcond = filter_cond self.sleep_interval = interval self.additional_wait = add_sec self.callback = callback self.eval_all = eval_all self.logger = get_logger(self) self.known_checkpoints = set()
def __init__( self, checkpoint_root, metric_template, metric_key, n_keep=5, lower_is_better=True, ): """ Parameters ---------- checkpoint_root : str Path to look for checkpoints. metric_template : str Format string to find metric file. metric_key : str Key to use from metric file. n_keep : int Maximum number of checkpoints to keep. """ self.root = checkpoint_root self.metric_template = metric_template self.metric_key = metric_key self.n_keep = n_keep self.lower_is_better = lower_is_better self.logger = get_logger(self)
def __init__( self, scalars={}, histograms={}, images={}, logs={}, graph=None, interval=100, root_path="logs", log_images_to_tensorboard=False, ): """ Parameters ---------- scalars : dict Scalar ops. histograms : dict Histogram ops. images : dict Image ops. Note that for these no tensorboard logging ist used but a custom image saver. logs : dict Logs to std out via logger. graph : tf.Graph Current graph. interval : int Intervall of training steps before logging. root_path : str Path at which the logs are stored. """ scalars = [tf.summary.scalar(n, s) for n, s in scalars.items()] histograms = [ tf.summary.histogram(n, h) for n, h in histograms.items() ] self.log_images_to_tensorboard = log_images_to_tensorboard if log_images_to_tensorboard: im_summaries = [tf.summary.image(n, i) for n, i in images.items()] else: im_summaries = [] self._has_summary = len(scalars + histograms + im_summaries) > 0 if self._has_summary: summary_op = tf.summary.merge(scalars + histograms + im_summaries) else: summary_op = tf.no_op() self.fetch_dict = { "summaries": summary_op, "logs": logs, "images": images } self.interval = interval self.graph = graph self.root = root_path self.logger = get_logger(self)
def __init__( self, variables, checkpoint_path, filter_cond=lambda c: True, global_step_setter=None, ): """ Parameters ---------- variables : list tf.Variable to be loaded from the checkpoint. checkpoint_path : str Directory in which the checkpoints are stored or explicit checkpoint. Ignored if used as functor. filter_cond : Callable A function used to filter files, to only get the checkpoints that are wanted. Ignored if used as functor. global_step_setter : Callable Callback to set global_step. """ self.root = checkpoint_path self.fcond = filter_cond self.setstep = global_step_setter self.logger = get_logger(self) self.saver = tf.train.Saver(variables)
def __init__(self, global_step_getter, save_root, model, config): self.get_global_step = global_step_getter self._root = save_root self.model = model self.config = config self.plot_batches = self.config.get("plot_batches", False) self.logger = get_logger(self)
def __init__( self, root_path, global_step_getter, global_step_setter, save, restore, interval=None, ckpt_zero=False, modelname="model", ): """ Parameters ---------- """ self.root = root_path self.logger = get_logger(self) self.global_step_getter = global_step_getter self.global_step_setter = global_step_setter self._save = save self._restore = restore self.interval = interval self.ckpt_zero = ckpt_zero os.makedirs(root_path, exist_ok=True) self.savename = os.path.join(root_path, "{}-{{}}.ckpt".format(modelname)) self._active = False
def __init__(self, root_path, model, modelname="model", interval=None): """ Parameters ---------- root_path : str Path to where the checkpoints are stored. model : nn.Module Model to checkpoint. modelname : str Prefix for checkpoint files. interval : int Number of iterations after which a checkpoint is saved. In any case a checkpoint is savead after each epoch. """ self.root = root_path self.interval = interval self.model = model self.logger = get_logger(self) os.makedirs(root_path, exist_ok=True) self.savename = os.path.join(root_path, "{{}}-{{}}_{}.ckpt".format(modelname)) # Init to save even before first step... More of a debug statement self.step = 0 self.epoch = 0
def __init__(self, images={}, interval=100, root_path="logs"): """ Logs an overview of all image outputs at an intervall of training steps. Parameters ---------- scalars : dict Scalar ops. histograms : dict Histogram ops. images : dict Image ops. Note that for these no tensorboard logging ist used but a custom image saver. logs : dict Logs to std out via logger. graph : tf.Graph Current graph. interval : int Intervall of training steps before logging. root_path : str Path at which the logs are stored. """ summary_op = tf.no_op() # self.log_images_to_tensorboard = log_images_to_tensorboard # TODO: actually implement this functionality self.fetch_dict = {"summaries": summary_op, "images": images} self.interval = interval self.root = root_path self.logger = get_logger(self)
def __init__(self, targets, k=100): ''' Arguments --------- targets : torch.Tensor All ``M`` possible reference poses. Shape ``[M, Z]`` transition_probability : Callable A function which takes a batch of pairs of pose encodings and returns the transition probability from the first to the second. ''' self.logger = get_logger(self) self.k = k self.targets = targets self.targets_flat = self.targets.reshape(len(targets), -1) self.nn_sampler = faiss.IndexFlatL2(self.targets_flat.shape[-1]) self.nn_sampler.add(self.targets_flat) n = self.nn_sampler.ntotal nt = '' if self.nn_sampler.is_trained else ' not' self.logger.info(f'NN sampler contains {n} examples and is{nt} trained') self.transition_probability = TransProb()
def __init__( self, model, checkpoint_path, filter_cond=lambda c: True, global_step_setter=None, ): """ Parameters ---------- model : torch.nn.Module Model to initialize checkpoint_path : str Directory in which the checkpoints are stored or explicit checkpoint. Ignored if used as functor. filter_cond : Callable A function used to filter files, to only get the checkpoints that are wanted. Ignored if used as functor. global_step_setter : Callable Function, that the retrieved global step can be passed to. """ self.root = checkpoint_path self.fcond = filter_cond self.logger = get_logger(self) self.model = model self.global_step_setter = global_step_setter
def __init__(self, global_step=None): """Args: global_step (tf.Variable): Variable tracking the training step. """ self.global_step = global_step self.logger = get_logger(self)
def __init__( self, root_path, variables, modelname="model", session=None, step=None, interval=None, max_to_keep=5, ): """Args: root_path (str): Path to where the checkpoints are stored. variables (list): List of all variables to keep track of. session (tf.Session): Session instance for saver. modelname (str): Used to name the checkpoint. step (tf.Tensor or callable): Step op, that can be evaluated (i,.e. a tf.Tensor or a python callable returning the step as an integer). interval (int): Number of iterations after which a checkpoint is saved. If None, a checkpoint is saved after each epoch. max_to_keep (int): Maximum number of checkpoints to keep on disk. Use 0 or None to never delete any checkpoints. """ self.root = root_path self.interval = interval self.step = step if step is not None else tf.train.get_global_step() self.saver = tf.train.Saver(variables, max_to_keep=max_to_keep) self.logger = get_logger(self) os.makedirs(root_path, exist_ok=True) self.savename = os.path.join(root_path, "{}.ckpt".format(modelname)) self.session = session
def __init__(self, global_step_getter, save_root, model, config): self.get_global_step = global_step_getter self._root = save_root self.model = model self.config = config self.batch_store_keys = self.config.get("batch_store_keys") self.batch_input_keys = self.config.get("batch_input_keys") self.fetch_output_keys = self.config.get("fetch_output_keys") self.logger = get_logger(self)
def __init__( self, config, root, model, datasets, hook_freq=100, num_epochs=100, hooks=[], bar_position=0, nogpu=False, desc="", ): """Constructor. Parameters ---------- model : object Model class. num_epochs : int Number of times to iterate over the data. hooks : list List containing :class:`Hook` instances. hook_freq : int Frequency at which hooks are evaluated. bar_position : int Used by tqdm to place bars at the right position when using multiple Iterators in parallel. """ signal.signal(signal.SIGTERM, self._handle_sigterm) signal.signal(signal.SIGINT, self._handle_sigterm) self.config = config self.root = root self.model = model self.datasets = datasets # backwards compatibility self.dataset = datasets["train"] self.validation_dataset = datasets["validation"] self.num_epochs = num_epochs self.hooks = hooks self.epoch_hooks = list() self.hook_freq = hook_freq self.bar_pos = bar_position * 2 self.desc = desc self.logger = get_logger(type(self).__name__) self._global_step = 0 self._batch_step = 0 self._epoch_step = 0 self._split = None
def __init__(self, config, variational): super(AnimalEncoder, self).__init__() self.variational = variational self.logger = get_logger("Encoder") self.model = getattr(models, "resnet" + str(config.get("resnet_type", "50")))( pretrained=config.get("pretrained", False)) self.config = config if config["load_encoder_pretrained"]["active"]: print( f"Loading weights for Encoder from {config['load_encoder_pretrained']['path']}." ) state = torch.load(f"{config['load_encoder_pretrained']['path']}") try: self.model.fc = nn.Linear( state["model"]["encoder_x1.model.fc.weight"].shape[1], state["model"]["encoder_x1.model.fc.weight"].shape[0]) new_state_dict = OrderedDict() for k, v in state["model"].items(): if k.startswith("encoder_"): # remove `encoder_{x1,x2}.` name = k.replace("encoder_x1.", "").replace("encoder_x2.", "").replace("model.", "") new_state_dict[name] = v self.model.load_state_dict(new_state_dict) self.model.fc = nn.Linear(self.model.fc.in_features, config["encoder_latent_dim"]) except Exception as exc: print(exc) new_state_dict = OrderedDict() for k, v in state["model"].items(): name = k.replace("model.", "") # remove `model.` new_state_dict[name] = v # Overrides default last layer with the shape of the pretrained # This layer is just adapted so we can load the weights without problems # It will be overwritten in the net step anyways. in_features = new_state_dict["fc.weight"].shape[1] classes = new_state_dict["fc.weight"].shape[0] self.model.fc = nn.Linear(in_features, classes) self.model.load_state_dict(new_state_dict) # save fc layer dimensions in_features = self.model.fc.in_features if self.variational: # self.model = nn.Sequential(*list(self.model.children())[:-1]) self.model.fc = nn.Linear(in_features, config["encoder_latent_dim"]) self.fc1 = nn.Linear(config["encoder_latent_dim"], config["encoder_latent_dim"]) self.fc2 = nn.Linear(config["encoder_latent_dim"], config["encoder_latent_dim"]) else: self.model.fc = nn.Linear(in_features, config["encoder_latent_dim"])
def __init__(self, global_step=None): """ Parameters ---------- global_step : tf.Variable Variable tracking the training step. """ self.global_step = global_step self.logger = get_logger(self)
def __init__( self, dataset, sub_dir_keys=[], label_key=None, callbacks=[], meta=None, step_getter=None, keypath="step_ops", ): """ .. warning:: To work with ``edeval`` you **must** specify ``meta=config`` when instantiating the EvalHook. Parameters ---------- dataset : DatasetMixin The Dataset used for creating the new data. sub_dir_keys : list(str) Keys found in :attr:`example`, which will be used to make a subdirectory for the stored example. Subdirectories are made in a nested fashion in the order of the list. The keys will be removed from the example dict and not be stored explicitly. label_keys : list(str) Keys found in :attr:`example`, which will be stored in one large array and later loaded as labels. callbacks : list(Callable) Called at the end of the epoch. Must accept root as argument as well as the generating dataset and the generated dataset (in that order). meta : object, dict An object containing metadata. Must be dumpable by ``yaml``. Usually the ``edflow`` config. step_getter : Callable Function which returns the global step as ``int``. keypath : str Path in result which will be stored. """ self.logger = get_logger(self) self.cbacks = callbacks self.logger.info("{}".format(self.cbacks)) self.cb_names = [inspect.getmodule(c).__name__ for c in self.cbacks] self.sdks = sub_dir_keys self.lk = label_key self.data_in = dataset self.meta = meta self.gs = step_getter self.keypath = keypath
def __init__(self, checkpoint_root, max_n, interval=5): """Args: checkpoint_root (str): Path to look for checkpoints. max_n (int): Wait as long as there are more than max_n ckpts. interval (float): Number of seconds after which to check for number of checkpoints again. """ self.root = checkpoint_root self.max_n = max_n self.sleep_interval = interval self.logger = get_logger(self)
def __init__(self, global_step_getter, save_root, model, config): self.get_global_step = global_step_getter self._root = save_root self.model = model self.config = config self.plot_batches = self.config.get("plot_batches", False) self.view0_key = self.config.get( "infer_logits_hook_options")["view0_key"] self.view0_mask_rgb_key = self.config.get( "infer_logits_hook_options")["view0_mask_rgb_key"] self.part_logit_key = self.config.get( "infer_logits_hook_options")["part_logit_key"] self.logger = get_logger(self)
def initialize_model(model, checkpoint, session=None): '''Loads weights from a checkpointfile and initializes the model. This function is just for the case of restoring the market-1501 pretrained model because we have to map variable names correctly. For newly written checkpoints use the RestoreCheckpointHook. =======> THIS FUNCTION IS ONLY INTENDED TO BE USED HERE <======= ''' sess = session or tf.Session() sess.run(tf.global_variables_initializer()) if checkpoint is None: raise ValueError('The reIdEvaluator needs a checkpoint from which ' 'to initialize the model.') var_map = {} for v in model.variables: vn = v.name.strip(model.model_name).strip('/').strip(':0') var_map[vn] = v tf.train.Saver(var_map).restore(sess, checkpoint) get_logger("initialize_model").info( "Restored model from {}".format(checkpoint))
def __init__( self, log_ops=[], scalar_keys=[], histogram_keys=[], image_keys=[], log_keys=[], graph=None, interval=100, root_path="logs", ): """ Parameters ---------- log_ops : list Ops to run at logging time. scalars : dict Scalar ops. histograms : dict Histogram ops. images : dict Image ops. Note that for these no tensorboard logging ist used but a custom image saver. logs : dict Logs to std out via logger. graph : tf.Graph Current graph. interval : int Intervall of training steps before logging. root_path : str Path at which the logs are stored. """ self.log_ops = log_ops self.scalar_keys = scalar_keys self.histogram_keys = histogram_keys self.image_keys = image_keys self.log_keys = log_keys self.interval = interval self.tb_logger = SummaryWriter(root_path) self.graph = graph self.root = root_path self.logger = get_logger(self)
def __init__(self, checkpoint_root, max_n, interval=5): """ Parameters ---------- checkpoint_root : str Path to look for checkpoints. max_n : int Wait as long as there are more than max_n ckpts. interval : float Number of seconds after which to check for number of checkpoints again. """ self.root = checkpoint_root self.max_n = max_n self.sleep_interval = interval self.logger = get_logger(self)
def __init__( self, config, root, model, dataset, hook_freq=100, num_epochs=100, hooks=[], bar_position=0, nogpu=False, desc="", ): """Constructor. Args: model (object): Model class. num_epochs (int): Number of times to iterate over the data. hooks (list): List containing :class:`Hook` instances. hook_freq (int): Frequency at which hooks are evaluated. bar_position (int): Used by tqdm to place bars at the right position when using multiple Iterators in parallel. """ signal.signal(signal.SIGTERM, self._handle_sigterm) self.config = config self.root = root self.model = model self.dataset = dataset self.num_epochs = num_epochs self.hooks = hooks self.hook_freq = hook_freq self.bar_pos = bar_position * 2 self.desc = desc self.logger = get_logger(type(self).__name__) self._global_step = 0 self._batch_step = 0 self._epoch_step = 0
def __init__(self, paths, interval, root_path): """ Parameters ---------- paths : list(str) List of key-paths to logging outputs. Will be expanded so they can be evaluated lazily. interval : int Intervall of training steps before logging. root_path : str Path at which the logs are stored. """ self.paths = paths self.interval = interval self.root = root_path self.logger = get_logger(self) self.handlers = { "images": [self.log_images], "scalars": [self.log_scalars] }
def __init__(self, update_file, callback): """Args: update_file (str): path/to/yaml-file containing the parameters of interest. callback (Callable): Each time something changes in the update_file this function is called with the content of the file as argument. """ self.logger = get_logger(self) self.ufile = update_file self.callback = callback self.last_updates = None if not os.path.exists(self.ufile): msg = ("# Automatically created file. Changes made in here will " "be recognized during runtime.") with open(self.ufile, "w+") as f: f.write(msg)
def __init__( self, checkpoint_root, metric_template, metric_key, n_keep=5, lower_is_better=True, ): """Args: checkpoint_root (str): Path to look for checkpoints. metric_template (str): Format string to find metric file. metric_key (str): Key to use from metric file. n_keep (int): Maximum number of checkpoints to keep. """ self.root = checkpoint_root self.metric_template = metric_template self.metric_key = metric_key self.n_keep = n_keep self.lower_is_better = lower_is_better self.logger = get_logger(self)
def __init__(self, root, keys, names=None, time_axis=None): '''Extracts the keys from the results, and plots the resulting tensor. Args: root (str): path/to/where the images are saved. keys (list of str): key/to/image_tensor. names (list of str): names for the image batches for saveing. If None, the laste element of keys.split('/') is used. time_axis (int): If given, this axis is used to split the image batches into single frame batches. These must then have a rank of 4. ''' self.root = root self.keys = keys self.names = names if self.names is not None: assert len(self.names) == len(self.keys) self.time_axis = time_axis self.logger = get_logger(self)
def __init__( self, log_ops=[], scalar_keys=[], histogram_keys=[], image_keys=[], log_keys=[], graph=None, interval=100, root_path="logs", ): """Args: log_ops (list): Ops to run at logging time. scalars (dict): Scalar ops. histograms (dict): Histogram ops. images (dict): Image ops. Note that for these no tensorboard logging ist used but a custom image saver. logs (dict): Logs to std out via logger. graph (tf.Graph): Current graph. interval (int): Intervall of training steps before logging. root_path (str): Path at which the logs are stored. """ self.log_ops = log_ops self.scalar_keys = scalar_keys self.histogram_keys = histogram_keys self.image_keys = image_keys self.log_keys = log_keys self.interval = interval self.tb_logger = SummaryWriter(root_path) self.graph = graph self.root = root_path self.logger = get_logger(self)
def log_scalars(self, results, step, path): for name in sorted(results.keys()): if not path in self.loggers: self.loggers[path] = get_logger(path) self.loggers[path].info("{}: {}".format(name, results[name]))