def normalize_configuration(cfg: Namespace, train_mode: bool) -> None: """Given a configuration namespace, normalize the values it contains. Arguments: cfg: The namespace object returned by `Configuration.make_namespace` train_mode: Boolean flag controlling normalization of parameters only used during training. """ if train_mode: _normalize_train_cfg(cfg) if cfg.tf_manager is None: cfg.tf_manager = get_default_tf_manager() cfg.evaluation = [(e[0], e[0], e[1]) if len(e) == 2 else e for e in cfg.evaluation] if cfg.evaluation: cfg.main_metric = "{}/{}".format(cfg.evaluation[-1][0], cfg.evaluation[-1][-1].name) else: cfg.main_metric = "{}/{}".format(cfg.runners[-1].decoder_data_id, cfg.runners[-1].loss_names[0]) if not cfg.tf_manager.minimize_metric: raise ValueError("minimize_metric must be set to True in " "TensorFlowManager when using loss as " "the main metric")
def normalize_configuration(cfg: Namespace, train_mode: bool) -> None: """Given a configuration namespace, normalize the values it contains. Arguments: cfg: The namespace object returned by `Configuration.make_namespace` train_mode: Boolean flag controlling normalization of parameters only used during training. """ if train_mode: _normalize_train_cfg(cfg) if cfg.tf_manager is None: cfg.tf_manager = get_default_tf_manager() if (cfg.batch_size is None) == (cfg.batching_scheme is None): raise ValueError("You must specify either batch_size or " "batching_scheme (not both).") if cfg.batch_size is not None: assert cfg.batching_scheme is None cfg.batching_scheme = BatchingScheme(batch_size=cfg.batch_size) else: assert cfg.batching_scheme is not None cfg.batch_size = cfg.batching_scheme.batch_size if cfg.runners_batch_size is None: cfg.runners_batch_size = cfg.batching_scheme.batch_size cfg.runners_batching_scheme = BatchingScheme( batch_size=cfg.runners_batch_size, token_level_batching=cfg.batching_scheme.token_level_batching, use_leftover_buckets=True) cfg.evaluation = [(e[0], e[0], e[1]) if len(e) == 2 else e for e in cfg.evaluation] if cfg.evaluation: cfg.main_metric = "{}/{}".format(cfg.evaluation[-1][0], cfg.evaluation[-1][-1].name) else: cfg.main_metric = "{}/{}".format(cfg.runners[-1].decoder_data_id, cfg.runners[-1].loss_names[0]) if not cfg.tf_manager.minimize_metric: raise ValueError("minimize_metric must be set to True in " "TensorFlowManager when using loss as " "the main metric")
def build_model(self) -> None: if self._model_built: raise RuntimeError("build_model() called twice") random.seed(self.config.args.random_seed) np.random.seed(self.config.args.random_seed) with self.graph.as_default(): tf.set_random_seed(self.config.args.random_seed) # Enable the created model parts to find this experiment. type(self)._current_experiment = self # type: ignore self.config.build_model(warn_unused=self.train_mode) type(self)._current_experiment = None self._model = self.config.model self._model_built = True if self.model.runners_batch_size is None: self.model.runners_batch_size = self.model.batch_size if self.model.tf_manager is None: self.model.tf_manager = get_default_tf_manager() if self.train_mode: check_dataset_and_coders(self.model.train_dataset, self.model.runners) if isinstance(self.model.val_dataset, Dataset): check_dataset_and_coders(self.model.val_dataset, self.model.runners) else: for val_dataset in self.model.val_dataset: check_dataset_and_coders(val_dataset, self.model.runners) if self.train_mode and self.model.visualize_embeddings: visualize_embeddings(self.model.visualize_embeddings, self.model.output) self._check_unused_initializers()