Example #1
0
 def _set_checkpointer(self, model):
     if (self.config["checkpoint"]
             or self.config["lr_scheduler"] == "reduce_on_plateau"):
         self._validate_checkpoint_metric(model)
         # Set checkpoint_dir to log_dir/checkpoints/
         if self.writer:
             if not self.config["checkpoint_config"]["checkpoint_dir"]:
                 self.config["checkpoint_config"][
                     "checkpoint_dir"] = os.path.join(
                         self.writer.log_subdir, "checkpoints")
             else:
                 # If you hardcode checkpoint_dir, checkpoints from concurrent runs
                 # may overwrite each other.
                 msg = (
                     "You have provided checkpoint_dir, overriding the default "
                     "of using log_dir/run_dir/run_name/checkpoints. Be careful: "
                     "multiple concurrent runs may override each other.")
                 warnings.warn(msg)
         else:
             self.config["checkpoint_config"][
                 "checkpoint_dir"] = "checkpoints"
         # Create Checkpointer
         self.checkpointer = Checkpointer(self.config["checkpoint_config"],
                                          verbose=self.config["verbose"])
     else:
         self.checkpointer = None
Example #2
0
 def _set_checkpointer(self, train_config):
     if train_config["checkpoint"]:
         self.checkpointer = Checkpointer(
             train_config["checkpoint_config"], verbose=self.config["verbose"]
         )
     else:
         self.checkpointer = None
Example #3
0
 def _set_checkpointer(self, train_config):
     if train_config["checkpoint"]:
         # Default to valid split for checkpoint metric
         checkpoint_config = train_config["checkpoint_config"]
         checkpoint_metric = checkpoint_config["checkpoint_metric"]
         if checkpoint_metric.count("/") == 0:
             checkpoint_config[
                 "checkpoint_metric"] = f"valid/{checkpoint_metric}"
         self.checkpointer = Checkpointer(checkpoint_config,
                                          verbose=self.config["verbose"])
     else:
         self.checkpointer = None
Example #4
0
    def _set_checkpointer(self, model):
        if (self.config["checkpoint"]
                or self.config["lr_scheduler"] == "reduce_on_plateau"):
            self._validate_checkpoint_metric(model)
            # Set checkpoint_dir to log_dir/checkpoints/
            if self.writer:
                if not self.config["checkpoint_config"]["checkpoint_dir"]:
                    self.config["checkpoint_config"][
                        "checkpoint_dir"] = os.path.join(
                            self.writer.log_subdir, "checkpoints")
                else:
                    # If you hardcode checkpoint_dir, checkpoints from concurrent runs
                    # may overwrite each other.
                    msg = (
                        "You have provided checkpoint_dir, overriding the default "
                        "of using log_dir/run_dir/run_name/checkpoints. Be careful: "
                        "multiple concurrent runs may override each other.")
                    warnings.warn(msg)
            else:
                self.config["checkpoint_config"][
                    "checkpoint_dir"] = "checkpoints"
            # Create Checkpointer
            self.checkpointer = Checkpointer(self.config["checkpoint_config"],
                                             verbose=self.config["verbose"])
        else:
            self.checkpointer = None

        # EXPERIMENTAL: Optionally add task-specific checkpointers
        # HACK: This is hard-coded in a way specific to Glue!
        self.task_checkpointers = []
        if self.config["checkpoint_tasks"]:
            msg = (
                "checkpoint_tasks setting does not have the same thorough error "
                "checking that the normal checkpoint operation has, so you may "
                "accidentally be trying to checkpoint metrics that aren't going to be "
                "found in the metrics_dict if you're not careful.")
            warnings.warn(msg)
            for task_name in self.task_names:
                # We only make task_specific checkpoints for the glue tasks

                # HACK: allow checkpointing on slice tasks
                using_slice = ":" in task_name
                orig_task_name = task_name.split(
                    ":")[0] if using_slice else None

                if (task_name not in GLUE_METRICS) and (orig_task_name
                                                        not in GLUE_METRICS):
                    continue
                checkpoint_config = copy.deepcopy(
                    self.config["checkpoint_config"])
                checkpoint_config["checkpoint_dir"] += f"/{task_name}"
                checkpoint_config["checkpoint_best"] = True

                checkpoint_metric = ((
                    f"{task_name}/{orig_task_name}_valid/{GLUE_METRICS[orig_task_name]}"
                ) if using_slice else (
                    f"{task_name}/{task_name}_valid/{GLUE_METRICS[task_name]}")
                                     )
                checkpoint_config["checkpoint_metric"] = checkpoint_metric
                checkpoint_config["checkpoint_metric_mode"] = "max"
                task_checkpointer = Checkpointer(
                    checkpoint_config, verbose=self.config["verbose"])
                self.task_checkpointers.append(task_checkpointer)