def after_create_session(self, session, coord): """Log average norm and number of zeros of variables values.""" super().after_create_session(session, coord) # Compute norms and num_zeros for each variable in the graph average_norms = defaultdict(list) num_zeros = defaultdict(list) for var in tf.global_variables(): if self.whitelist is not None and not any( name in var.name.lower() for name in self.whitelist): continue if self.blacklist is not None and any(name in var.name.lower() for name in self.blacklist): continue value = session.run(var) average_norms[f"{_get_name(var)}_init_average_norm"].append( _average_norm(value)) num_zeros[f"{_get_name(var)}_init_num_zeros"].append( _num_zeros(value)) # Average norms and sum zeros for partitioned variables average_norms = { name: np.mean(values) for name, values in average_norms.items() } num_zeros = {name: sum(values) for name, values in num_zeros.items()} # Log results metrics = {**average_norms, **num_zeros} for name, value in metrics.items(): LOGGER.info(f"{name} = {value}") if self.use_mlflow: mlflow.log_metrics(metrics)
def after_create_session(self, session, coord): super().after_create_session(session, coord) num_global, num_trainable = get_num_params() LOGGER.info(f"Number of parameters (global) = {num_global}") LOGGER.info(f"Number of parameters (trainable) = {num_trainable}") if self.use_mlflow: mlflow.log_metrics({"num_params_global": num_global, "num_params_trainable": num_trainable})