Example #1
0
    def after_create_session(self, session, coord):
        """Log average norm and number of zeros of variables values."""
        super().after_create_session(session, coord)

        # Compute norms and num_zeros for each variable in the graph
        average_norms = defaultdict(list)
        num_zeros = defaultdict(list)
        for var in tf.global_variables():
            if self.whitelist is not None and not any(
                    name in var.name.lower() for name in self.whitelist):
                continue
            if self.blacklist is not None and any(name in var.name.lower()
                                                  for name in self.blacklist):
                continue
            value = session.run(var)
            average_norms[f"{_get_name(var)}_init_average_norm"].append(
                _average_norm(value))
            num_zeros[f"{_get_name(var)}_init_num_zeros"].append(
                _num_zeros(value))

        # Average norms and sum zeros for partitioned variables
        average_norms = {
            name: np.mean(values)
            for name, values in average_norms.items()
        }
        num_zeros = {name: sum(values) for name, values in num_zeros.items()}

        # Log results
        metrics = {**average_norms, **num_zeros}
        for name, value in metrics.items():
            LOGGER.info(f"{name} = {value}")
        if self.use_mlflow:
            mlflow.log_metrics(metrics)
Example #2
0
 def after_create_session(self, session, coord):
     super().after_create_session(session, coord)
     num_global, num_trainable = get_num_params()
     LOGGER.info(f"Number of parameters (global) = {num_global}")
     LOGGER.info(f"Number of parameters (trainable) = {num_trainable}")
     if self.use_mlflow:
         mlflow.log_metrics({"num_params_global": num_global, "num_params_trainable": num_trainable})