Ejemplo n.º 1
0
def setup_callbacks(cfg, callbacks, validation):
    ckpt_cfg = cfg.ModelCheckpoint
    es_cfg = cfg.EarlyStopping
    tb_cfg = cfg.TensorBoard

    if not validation:
        if ckpt_cfg.enabled and ckpt_cfg.monitor.startswith("val_"):
            ckpt_cfg.enabled = False
            ckpt_cfg.monitor = ckpt_cfg.monitor[4:]
        if es_cfg.enabled and es_cfg.monitor.startswith("val_"):
            es_cfg.enabled = False
            es_cfg.monitor = es_cfg.monitor[4:]

    if es_cfg.enabled:
        es_callback = EarlyStopping(
            monitor=es_cfg.monitor,
            patience=es_cfg.patience,
            mode=es_cfg.mode,
            verbose=es_cfg.verbose,
            baseline=es_cfg.baseline,
            restore_best_weights=es_cfg.restore_best_weights)
        callbacks.append(es_callback)

    if ckpt_cfg.enabled:
        if not ckpt_cfg.path.endswith(gg.file_ext()):
            ckpt_cfg.path += gg.file_ext()
        makedirs_from_filepath(ckpt_cfg.path)
        mc_callback = ModelCheckpoint(
            ckpt_cfg.path,
            monitor=ckpt_cfg.monitor,
            save_best_only=ckpt_cfg.save_best_only,
            save_weights_only=ckpt_cfg.save_weights_only,
            verbose=ckpt_cfg.vervose)
        callbacks.append(mc_callback)

    if cfg.TerminateOnNaN.enabled:
        callbacks.append(TerminateOnNaN())

    if tb_cfg.enabled:
        callbacks.append(
            tf.keras.callbacks.TensorBoard(
                tb_cfg.log_dir,
                write_graph=tb_cfg.write_graph,
                update_freq=tb_cfg.update_freq,
                histogram_freq=tb_cfg.histogram_freq,
                write_images=tb_cfg.write_images))
    return cfg, callbacks
Ejemplo n.º 2
0
    def save(self,
             path=None,
             as_model=False,
             overwrite=True,
             save_format=None,
             **kwargs):

        if not path:
            path = self.ckpt_path

        makedirs_from_filepath(path)

        if as_model:
            if self.backend == "tensorflow":
                saver.save_tf_model(self.model,
                                    path,
                                    overwrite=overwrite,
                                    save_format=save_format,
                                    **kwargs)
            else:
                saver.save_torch_model(self.model,
                                       path,
                                       overwrite=overwrite,
                                       save_format=save_format,
                                       **kwargs)
        else:
            if self.backend == "tensorflow":
                saver.save_tf_weights(self.model,
                                      path,
                                      overwrite=overwrite,
                                      save_format=save_format)
            else:
                saver.save_torch_weights(self.model,
                                         path,
                                         overwrite=overwrite,
                                         save_format=save_format)
Ejemplo n.º 3
0
    def train(self, train_data, val_data=None, **kwargs):
        cache = self.cache
        cfg = self.cfg.train
        cfg.merge_from_dict(kwargs)
        ckpt_cfg = cfg.ModelCheckpoint
        es_cfg = cfg.EarlyStopping
        pb_cfg = cfg.Progbar

        model = self.model
        if model is None:
            raise RuntimeError(
                'You must compile your model before training/testing/predicting. Use `trainer.build()`.'
            )

        if not isinstance(train_data, Sequence):
            train_data = self.train_sequence(train_data)

        cache.train_data = train_data

        validation = val_data is not None

        if validation:
            if not isinstance(val_data, Sequence):
                val_data = self.test_sequence(val_data)
            cache.val_data = val_data
        elif ckpt_cfg.enabled and ckpt_cfg.monitor.startswith("val_"):
            ckpt_cfg.monitor = ckpt_cfg.monitor[4:]
            warnings.warn(
                f"The metric 'val_{ckpt_cfg.monitor}' is invalid without validation "
                f"and has been automatically replaced with '{ckpt_cfg.monitor}'.",
                UserWarning)

        callbacks = callbacks_module.CallbackList()

        history = History()
        callbacks.append(history)

        if es_cfg.enabled:
            assert es_cfg.monitor.startswith("val")
            es_callback = EarlyStopping(monitor=es_cfg.monitor,
                                        patience=es_cfg.monitor,
                                        mode=es_cfg.mode,
                                        verbose=es_cfg.verbose)
            callbacks.append(es_callback)

        if ckpt_cfg.enabled:
            if not ckpt_cfg.path.endswith(gg.file_ext()):
                ckpt_cfg.path += gg.file_ext()
            makedirs_from_filepath(ckpt_cfg.path)

            mc_callback = ModelCheckpoint(
                ckpt_cfg.path,
                monitor=ckpt_cfg.monitor,
                save_best_only=ckpt_cfg.save_best_only,
                save_weights_only=ckpt_cfg.save_weights_only,
                verbose=ckpt_cfg.vervose)
            callbacks.append(mc_callback)

        callbacks.set_model(model)
        model.stop_training = False

        verbose = cfg.verbose
        if verbose:
            if verbose <= 2:
                progbar = Progbar(target=cfg.epochs,
                                  width=pb_cfg.width,
                                  verbose=verbose)
            print("Training...")

        logs = gf.BunchDict()
        callbacks.on_train_begin()
        try:
            for epoch in range(cfg.epochs):
                if verbose > 2:
                    progbar = Progbar(target=len(train_data),
                                      width=pb_cfg.width,
                                      verbose=verbose - 2)

                callbacks.on_epoch_begin(epoch)
                callbacks.on_train_batch_begin(0)
                train_logs = self.train_step(train_data)
                train_data.on_epoch_end()
                logs.update(train_logs)

                if validation:
                    valid_logs = self.test_step(val_data)
                    logs.update({("val_" + k): v
                                 for k, v in valid_logs.items()})
                    val_data.on_epoch_end()

                callbacks.on_train_batch_end(len(train_data), logs)
                callbacks.on_epoch_end(epoch, logs)

                if verbose > 2:
                    print(f"Epoch {epoch+1}/{epochs}")
                    progbar.update(len(train_data), logs.items())
                elif verbose:
                    progbar.update(epoch + 1, logs.items())

                if model.stop_training:
                    print(f"Early Stopping at Epoch {epoch}", file=sys.stderr)
                    break

            callbacks.on_train_end()
            if ckpt_cfg.enabled:
                if ckpt_cfg.save_weights_only:
                    model.load_weights(ckpt_cfg.path)
                else:
                    self.model = model.load(ckpt_cfg.path)

        finally:
            # to avoid unexpected termination of the model
            if ckpt_cfg.enabled and ckpt_cfg.remove_weights:
                self.remove_weights()

        return history
Ejemplo n.º 4
0
    def train(self,
              train_data,
              val_data=None,
              epochs=200,
              early_stopping=None,
              verbose=1,
              save_best=True,
              ckpt_path=None,
              as_model=False,
              monitor='val_accuracy',
              early_stop_metric='val_loss',
              callbacks=None,
              **kwargs):
        """Train the model for the input `train_data` of nodes or `sequence`.

        Note:
        ----------
        You must compile your model before training/testing/predicting. Use `model.build()`.

        Parameters:
        ----------
        train_data: Numpy array-like, `list`, Integer scalar or `graphgallery.Sequence`
            The index of objects (or sequence) that will be used during training.
        val_data: Numpy array-like, `list`, Integer scalar or
            `graphgallery.Sequence`, optional
            The index of objects (or sequence) that will be used for validation.
            (default :obj: `None`, i.e., do not use validation during training)
        epochs: Positive integer
            The number of epochs of training.(default :obj: `200`)
        early_stopping: Positive integer or None
            The number of early stopping patience during training. 
            (default :obj: `None`, i.e., do not use early stopping during training)
        verbose: int in {0, 1, 2, 3, 4}
            'verbose=0': not verbose;
            'verbose=1': Progbar (one line, detailed);
            'verbose=2': Progbar (one line, omitted);
            'verbose=3': Progbar (multi line, detailed);
            'verbose=4': Progbar (multi line, omitted);
            (default :obj: 1)
        save_best: bool
            Whether to save the best weights (accuracy of loss depend on `monitor`)
            of training or validation (depend on `validation` is `False` or `True`).
            (default :bool: `True`)
        ckpt_path: String or None
            The path of saved weights/model. 
            (default to current path.)
        as_model: bool
            Whether to save the whole model or weights only, if `True`, the `self.custom_objects`
            must be speficied if you are using custom `layer` or `loss` and so on.
        monitor: String
            One of evaluation metrics, e.g., val_loss, val_accuracy, loss, accuracy, 
            it determines which metric will be used for `save_best`. 
            (default :obj: `val_accuracy`)
        early_stop_metric: String
            One of evaluation metrics, e.g., val_loss, val_accuracy, loss, accuracy, 
            it determines which metric will be used for early stopping. 
            (default :obj: `val_loss`)
        callbacks: tensorflow.keras.callbacks. (default :obj: `None`)
        kwargs: other keyword Parameters.

        Return:
        ----------
        A `tf.keras.callbacks.History` object. Its `History.history` attribute is
            a record of training loss values and metrics values
            at successive epochs, as well as validation loss values
            and validation metrics values (if applicable).

        """
        raise_if_kwargs(kwargs)
        if not (isinstance(verbose, int) and 0 <= verbose <= 4):
            raise ValueError("'verbose=0': not verbose"
                             "'verbose=1': Progbar(one line, detailed), "
                             "'verbose=2': Progbar(one line, omitted), "
                             "'verbose=3': Progbar(multi line, detailed), "
                             "'verbose=4': Progbar(multi line, omitted), "
                             f"but got {verbose}")
        model = self.model
        # Check if model has been built
        if model is None:
            raise RuntimeError(
                'You must compile your model before training/testing/predicting. Use `model.build()`.'
            )

        metrics_names = getattr(model, "metrics_names", None)
        # FIXME: This would return '[]' for tensorflow>=2.2.0
        # See <https://github.com/tensorflow/tensorflow/issues/37990>
        # metrics_names = ['loss', 'accuracy']
        if not metrics_names:
            raise RuntimeError(f"Please specify the attribute 'metrics_names' for the model.")
        if not isinstance(train_data, Sequence):
            train_data = self.train_sequence(train_data)

        self.train_data = train_data

        validation = val_data is not None

        if validation:
            if not isinstance(val_data, Sequence):
                val_data = self.test_sequence(val_data)
            self.val_data = val_data
            metrics_names = metrics_names + ["val_" + metric for metric in metrics_names]

        if not isinstance(callbacks, callbacks_module.CallbackList):
            callbacks = callbacks_module.CallbackList(callbacks)

        history = History()
        callbacks.append(history)

        if early_stopping:
            es_callback = EarlyStopping(monitor=early_stop_metric,
                                        patience=early_stopping,
                                        mode='auto',
                                        verbose=kwargs.pop('es_verbose', 1))
            callbacks.append(es_callback)

        if save_best:
            if not ckpt_path:
                ckpt_path = self.ckpt_path
            else:
                self.ckpt_path = ckpt_path

            makedirs_from_filepath(ckpt_path)

            if not ckpt_path.endswith(gg.file_ext()):
                ckpt_path = ckpt_path + gg.file_ext()

            if monitor not in metrics_names:
                monitor = metrics_names[-1]
                warnings.warn(f"'{monitor}' are not included in the metrics names. default to '{monitor}'.",
                              UserWarning)

            mc_callback = ModelCheckpoint(ckpt_path,
                                          monitor=monitor,
                                          save_best_only=True,
                                          save_weights_only=not as_model,
                                          verbose=0)
            callbacks.append(mc_callback)

        callbacks.set_model(model)
        model.stop_training = False

        if verbose:
            if verbose <= 2:
                progbar = Progbar(target=epochs,
                                  width=20,
                                  verbose=verbose)
            print("Training...")

        logs = BunchDict()
        callbacks.on_train_begin()
        try:
            for epoch in range(epochs):
                if verbose > 2:
                    progbar = Progbar(target=len(train_data),
                                      width=20,
                                      verbose=verbose - 2)

                callbacks.on_epoch_begin(epoch)
                callbacks.on_train_batch_begin(0)
                train_logs = self.train_step(train_data)
                train_data.on_epoch_end()

                logs.update(train_logs)

                if validation:
                    valid_logs = self.test_step(val_data)
                    logs.update({("val_" + k): v for k, v in valid_logs.items()})
                    val_data.on_epoch_end()

                callbacks.on_train_batch_end(len(train_data), logs)
                callbacks.on_epoch_end(epoch, logs)

                if verbose > 2:
                    print(f"Epoch {epoch+1}/{epochs}")
                    progbar.update(len(train_data), logs.items())
                elif verbose:
                    progbar.update(epoch + 1, logs.items())

                if model.stop_training:
                    print(f"Early Stopping at Epoch {epoch}", file=sys.stderr)
                    break

            callbacks.on_train_end()
            self.load(ckpt_path, as_model=as_model)
        finally:
            # to avoid unexpected termination of the model
            self.remove_weights()

        return history