Example #1
0
def report_timing(level=Logging.VERBOSE):
    Logging(level).log("Time consumed:")
    for k in sorted(_TimingHelperClass.time_records.keys()):
        v = _TimingHelperClass.time_records[k]
        Logging(level).log(f"> {k}: {v:f}")
    Logging(level).log("------")
    _TimingHelperClass.time_records = {}
Example #2
0
 def progress(iterable=None, verbose=True, **kwargs):  # type: ignore
     if not verbose:
         return _DummyTqdm(iterable)
     Logging.warn(
         "`tqdm` package is not installed, no progress bar is shown.",
         category=ImportWarning)
     if type(iterable) is int:
         return range(iterable)
     return iterable
Example #3
0
    def train(self) -> None:
        # TODO: Incorporate `torchutils.prevent_oom`
        Logging(1).log("Training start.", timestamp=self.timestamp)
        while self.max_epochs == -1 or self.epoch < self.max_epochs:
            iterator = self.create_data_iterator()

            for batch in iterator:
                self.iterations += 1

                for hook in self.before_iteration_hooks:
                    hook(self)

                record_values = self.train_step(batch)

                for name, value in record_values.items():
                    if isinstance(value, tuple):
                        self.records[name].record.add(*value)
                    else:
                        self.records[name].record.add(value)

                for hook in self.after_iteration_hooks:
                    hook(self)

                if self.iterations % self.log_iters == 0:
                    self._print_summary(period='log')

                if self.iterations % self.valid_iters == 0:
                    self._print_summary(period='validate')

                    metric = self.validate()
                    if len(self.validation_history) == 0 or \
                            (metric > max(self.validation_history) and self.metric_higher_better) or \
                            (metric < min(self.validation_history) and not self.metric_higher_better):
                        self.bad_counter = 0
                        self.save_model()
                    else:
                        self.bad_counter += 1
                        Logging(1).log(
                            f"{utils.ordinal(self.bad_counter)} time degradation "
                            f"(threshold={self.decay_threshold}).")
                        if self.bad_counter >= self.decay_threshold:
                            self.decay_times += 1
                            if self.decay_times > self.patience:
                                Logging(1).log("Early stop!", color='red')
                                return
                            self.bad_counter = 0
                            self.decay()

                    self.validation_history.append(metric)

            self.epoch += 1
            self._print_summary(period='epoch')
Example #4
0
 def wrapped(model, *args, **kwargs):
     # TODO: is there a better way to inspect training loop stats?
     nonlocal first_time
     try:
         result = func(model, *args, **kwargs)
         first_time = False
         return result
     except RuntimeError as e:
         if 'out of memory' in str(e):
             # this is an OOM error, try releasing memory and do it again
             Logging.warn(f"CUDA out of memory error caught. " +
                          _cuda_memory_str())
             gc.collect()
             torch.cuda.empty_cache()
             Logging.warn(f"Forced GC complete. " + _cuda_memory_str())
             # now let's try again
             try:
                 result = func(model, *args, **kwargs)
                 first_time = False
                 return result
             except RuntimeError as e:
                 if 'out of memory' in str(e):
                     if first_time:
                         # OOM caused by other factors, don't bother saving
                         raise RuntimeError(
                             f"CUDA out of memory error caught at first run. "
                             f"Something else must be wrong.")
                     else:
                         # OOM again, this can't be fixed. Save the whole model and optimizer states
                         filename = f"{model.__class__.__name__}_{time.strftime('%Y-%m-%d %H:%M:%S')}.pt"
                         save_checkpoint(model, optim, filename=filename)
                         raise RuntimeError(
                             f"CUDA out of memory error caught after forced GC. "
                             f"Model & optimizer saved to {filename}. " +
                             _cuda_memory_str())
                 else:
                     raise e  # re-raise the exception
         else:
             raise e  # re-raise the exception
Example #5
0
 def _print_summary(self, period: str = 'log') -> None:
     summary = []
     for record in self.records.values():
         if record.period == period:
             value = record.record.value()
             if record.post_compute is not None:
                 value = record.post_compute(value)
             if isinstance(value, float):
                 value = f'{value:.{record.precision}f}'
             summary.append((record.display, value))
             record.record.clear()
     if len(summary) == 0:
         return
     records = ', '.join(f'{name}={value}' for name, value in summary)
     log_message = self.LOG_MESSAGE.format(epoch=self.epoch,
                                           iter=self.iterations,
                                           records=records)
     Logging(1).log(log_message, timestamp=self.timestamp)
Example #6
0
def work_in_progress(msg):
    begin_time = time.time()
    Logging.verbose(msg + "... ", end='')
    yield
    Logging.verbose(f"done. ({time.time() - begin_time:.2f}s)")
Example #7
0
    def __init__(self,
                 num_layers: int,
                 input_dim: int,
                 output_dim: int,
                 hidden_dims: Optional[List[int]] = None,
                 activation: MaybeList[Union[str, Activation]] = 'id',
                 bias: MaybeList[bool] = True,
                 bias_init: Optional[MaybeList[Optional[float]]] = None,
                 dropout: Optional[MaybeList[float]] = None):
        # validate num_layers
        if not isinstance(num_layers, int) or num_layers < 1:
            raise ValueError("`layers` should be a positive integer.")
        # validate hidden_dims
        hidden_dims = hidden_dims or []
        if len(hidden_dims) != num_layers - 1:
            raise ValueError("Length of `hidden_dim` should be `layers` - 1.")
        # validate bias
        if isinstance(bias, (bool, int)):
            bias = [bias] * num_layers
        if not (len(bias) == num_layers
                and all(isinstance(b, (bool, int)) for b in bias)):
            raise ValueError(
                "`bias` should be either a boolean, or a list of booleans of length `layers`."
            )
        # validate bias_init
        if bias_init is not None:
            if isinstance(bias_init, (float, int)):
                bias_init = [bias_init] * num_layers
            if not (len(bias_init) == num_layers
                    and all(b is None or isinstance(b, (float, int))
                            for b in bias_init)):
                raise ValueError(
                    "`bias_init` should be either a float, or a list of floats of length `layers`."
                )
        else:
            bias_init = [None] * num_layers
        # validate dropout
        if dropout is not None:
            if isinstance(dropout, float):
                dropout = [dropout] * num_layers
            if not (len(dropout) == num_layers and all(
                    isinstance(d, float) and 0 <= d < 1 for d in dropout)):
                raise ValueError(
                    "`dropout` should be either a float in range [0, 1),"
                    " or a list of floats of length `layers`.")
        else:
            dropout = [0.0] * num_layers
        # validate activation
        if isinstance(activation, str) or callable(activation):
            if activation == 'id' and num_layers > 1:
                is_bottleneck = num_layers == 2 and input_dim > hidden_dims[
                    0] and output_dim > hidden_dims[0]
                if not is_bottleneck:
                    Logging.warn(
                        "Using identity transform for non-bottleneck MLPs with more than one layer. "
                        "This is likely an incorrect setting.")
            if num_layers == 1:
                activation = [activation]
            else:
                activation = [activation] * (num_layers - 1) + ['id']
        elif len(activation) == num_layers - 1:
            activation = activation + ['id']
        if not (isinstance(activation, list) and len(activation) == num_layers
                and all((isinstance(f, str) and f in self.activation_func)
                        or callable(f) for f in activation)):
            raise ValueError(
                "Format of `activation` is incorrect. Refer to docstring for details."
            )

        super().__init__()

        dims = [input_dim] + hidden_dims + [output_dim]
        self.layers = nn.ModuleList([
            nn.Linear(in_features=in_dim, out_features=out_dim, bias=b)
            for in_dim, out_dim, b in zip(dims[:-1], dims[1:], bias)
        ])
        self.activations = [
            f if callable(f) else self.activation_func[f] for f in activation
        ]
        self.dropouts = [float(d) for d in dropout]
        self.bias_init = [
            float(b) if b is not None else None for b in bias_init
        ]

        self.reset_parameters()
Example #8
0
def work_in_progress(msg: str):
    Logging.verbose(msg + "... ", end='')
    begin_time = time.time()
    yield
    time_consumed = time.time() - begin_time
    Logging.verbose(f"done. ({time_consumed:.2f}s)")
Example #9
0
 def wrapped(*args, **kwargs):
     Logging.warn(warn_msg, category=DeprecationWarning)
     return func(*args, **kwargs)