Ejemplo n.º 1
0
    def __init__(
            self,
            filepath: str = './callback/val_mae_{epoch:05d}_{val_mae:.6f}.hdf5',
            monitor: str = 'val_mae',
            verbose: int = 0,
            save_best_only: bool = True,
            save_weights_only: bool = False,
            val_gen: Iterable = None,
            steps_per_val: int = None,
            target_scaler: Scaler = None,
            period: int = 1,
            mode: str = 'auto'):
        super().__init__()
        if val_gen is None:
            raise ValueError('No validation data is provided!')
        self.verbose = verbose
        if self.verbose > 0:
            logging.basicConfig(level=logging.INFO)
        self.filepath = filepath
        self.save_best_only = save_best_only
        self.save_weights_only = save_weights_only
        self.period = period
        self.epochs_since_last_save = 0
        self.val_gen = val_gen
        self.steps_per_val = steps_per_val
        self.target_scaler = target_scaler
        if self.target_scaler is None:
            self.target_scaler = DummyScaler()

        if monitor == 'val_mae':
            self.metric = mae
            self.monitor = 'val_mae'
        elif monitor == 'val_acc':
            self.metric = accuracy
            self.filepath = self.filepath.replace('val_mae', 'val_acc')
            self.monitor = 'val_acc'

        if mode == 'min':
            self.monitor_op = np.less
            self.best = np.Inf
        elif mode == 'max':
            self.monitor_op = np.greater
            self.best = -np.Inf
        else:
            if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
                self.monitor_op = np.greater
                self.best = -np.Inf
            else:
                self.monitor_op = np.less
                self.best = np.Inf
Ejemplo n.º 2
0
 def __init__(self,
              model,
              graph_converter,
              target_scaler=DummyScaler(),
              metadata=None,
              **kwargs):
     self.model = model
     self.graph_converter = graph_converter
     self.target_scaler = target_scaler
     self.metadata = metadata or {}
Ejemplo n.º 3
0
 def __init__(self,
              train_gen,
              steps_per_train=None,
              val_gen=None,
              steps_per_val=None,
              y_scaler=None,
              n_every=5,
              val_names=None,
              val_units=None,
              is_pa=False):
     super().__init__()
     self.train_gen = train_gen
     self.val_gen = val_gen
     self.steps_per_train = steps_per_train
     self.steps_per_val = steps_per_val
     self.yscaler = y_scaler
     self.epochs = []
     self.total_epoch = 0
     self.n_every = n_every
     self.val_names = val_names
     self.val_units = val_units
     self.is_pa = is_pa
     if self.yscaler is None:
         self.yscaler = DummyScaler()
Ejemplo n.º 4
0
 def __init__(self,
              train_gen: Iterable,
              steps_per_train: int = None,
              val_gen: Iterable = None,
              steps_per_val: int = None,
              y_scaler: Scaler = None,
              n_every: int = 5,
              val_names: List[str] = None,
              val_units: List[str] = None,
              is_pa: bool = False):
     super().__init__()
     self.train_gen = train_gen
     self.val_gen = val_gen
     self.steps_per_train = steps_per_train
     self.steps_per_val = steps_per_val
     self.yscaler = y_scaler
     self.epochs = []
     self.total_epoch = 0
     self.n_every = n_every
     self.val_names = val_names
     self.val_units = val_units
     self.is_pa = is_pa
     if self.yscaler is None:
         self.yscaler = DummyScaler()
Ejemplo n.º 5
0
 def __init__(self,
              model: Model,
              graph_converter: StructureGraph,
              target_scaler: Scaler = DummyScaler(),
              metadata: Dict = None,
              **kwargs):
     """
     Args:
         model: (keras model)
         graph_converter: (object) a object that turns a structure to a graph,
             check `megnet.data.crystal`
         target_scaler: (object) a scaler object for converting targets, check
             `megnet.utils.preprocessing`
         metadata: (dict) An optional dict of metadata associated with the model.
             Recommended to incorporate some basic information such as units,
             MAE performance, etc.
     """
     self.model = model
     self.graph_converter = graph_converter
     self.target_scaler = target_scaler
     self.metadata = metadata or {}
Ejemplo n.º 6
0
    def __init__(self,
                 nfeat_edge: int = None,
                 nfeat_global: int = None,
                 nfeat_node: int = None,
                 nblocks: int = 3,
                 lr: float = 1e-3,
                 n1: int = 64,
                 n2: int = 32,
                 n3: int = 16,
                 nvocal: int = 95,
                 embedding_dim: int = 16,
                 nbvocal: int = None,
                 bond_embedding_dim: int = None,
                 ngvocal: int = None,
                 global_embedding_dim: int = None,
                 npass: int = 3,
                 ntarget: int = 1,
                 act: Callable = softplus2,
                 is_classification: bool = False,
                 loss: str = "mse",
                 metrics: List[str] = None,
                 l2_coef: float = None,
                 dropout: float = None,
                 graph_converter: StructureGraph = None,
                 target_scaler: Scaler = DummyScaler(),
                 optimizer_kwargs: Dict = None,
                 dropout_on_predict: bool = False):
        """
        Args:
            nfeat_edge: (int) number of bond features
            nfeat_global: (int) number of state features
            nfeat_node: (int) number of atom features
            nblocks: (int) number of MEGNetLayer blocks
            lr: (float) learning rate
            n1: (int) number of hidden units in layer 1 in MEGNetLayer
            n2: (int) number of hidden units in layer 2 in MEGNetLayer
            n3: (int) number of hidden units in layer 3 in MEGNetLayer
            nvocal: (int) number of total element
            embedding_dim: (int) number of embedding dimension
            nbvocal: (int) number of bond types if bond attributes are types
            bond_embedding_dim: (int) number of bond embedding dimension
            ngvocal: (int) number of global types if global attributes are types
            global_embedding_dim: (int) number of global embedding dimension
            npass: (int) number of recurrent steps in Set2Set layer
            ntarget: (int) number of output targets
            act: (object) activation function
            l2_coef: (float or None) l2 regularization parameter
            is_classification: (bool) whether it is a classification task
            loss: (object or str) loss function
            metrics: (list or dict) List or dictionary of Keras metrics to be evaluated by the model during training
                and testing
            dropout: (float) dropout rate
            graph_converter: (object) object that exposes a "convert" method for structure to graph conversion
            target_scaler: (object) object that exposes a "transform" and "inverse_transform" methods for transforming
                the target values
            optimizer_kwargs (dict): extra keywords for optimizer, for example clipnorm and clipvalue
        """

        # Build the MEG Model
        model = make_megnet_model(nfeat_edge=nfeat_edge,
                                  nfeat_global=nfeat_global,
                                  nfeat_node=nfeat_node,
                                  nblocks=nblocks,
                                  n1=n1,
                                  n2=n2,
                                  n3=n3,
                                  nvocal=nvocal,
                                  embedding_dim=embedding_dim,
                                  nbvocal=nbvocal,
                                  bond_embedding_dim=bond_embedding_dim,
                                  ngvocal=ngvocal,
                                  global_embedding_dim=global_embedding_dim,
                                  npass=npass,
                                  ntarget=ntarget,
                                  act=act,
                                  is_classification=is_classification,
                                  l2_coef=l2_coef,
                                  dropout=dropout,
                                  dropout_on_predict=dropout_on_predict)

        # Compile the model with the optimizer
        loss = 'binary_crossentropy' if is_classification else loss

        opt_params = {'lr': lr}
        if optimizer_kwargs is not None:
            opt_params.update(optimizer_kwargs)
        model.compile(Adam(**opt_params), loss, metrics=metrics)

        if graph_converter is None:
            graph_converter = CrystalGraph(cutoff=4,
                                           bond_converter=GaussianDistance(
                                               np.linspace(0, 5, 100), 0.5))

        super().__init__(model=model,
                         target_scaler=target_scaler,
                         graph_converter=graph_converter)
Ejemplo n.º 7
0
    def __init__(
            self,
            filepath: str = './callback/val_mae_{epoch:05d}_{val_mae:.6f}.hdf5',
            monitor: str = 'val_mae',
            verbose: int = 0,
            save_best_only: bool = True,
            save_weights_only: bool = False,
            val_gen: Sequence = None,
            steps_per_val: int = None,
            target_scaler: Scaler = None,
            period: int = 1,
            mode: str = 'auto'):
        """
        Args:
            filepath (string): path to save the model file with format. For example
                `weights.{epoch:02d}-{val_mae:.6f}.hdf5` will save the corresponding epoch and
                val_mae in the filename
            monitor (string): quantity to monitor, default to "val_mae"
            verbose (int): 0 for no training log, 1 for only epoch-level log and 2 for batch-level log
            save_best_only (bool): whether to save only the best model
            save_weights_only (bool): whether to save the weights only excluding model structure
            val_gen (generator): validation generator
            steps_per_val (int): steps per epoch for validation generator
            target_scaler (object): exposing inverse_transform method to scale the output
            period (int): number of epoch interval for this callback
            mode: (string) choose from "min", "max" or "auto"
        """
        super().__init__()
        if val_gen is None:
            raise ValueError('No validation data is provided!')
        self.verbose = verbose
        if self.verbose > 0:
            logging.basicConfig(level=logging.INFO)
        self.filepath = filepath
        self.save_best_only = save_best_only
        self.save_weights_only = save_weights_only
        self.period = period
        self.epochs_since_last_save = 0
        self.val_gen = val_gen
        self.steps_per_val = steps_per_val or len(val_gen)
        self.target_scaler = target_scaler or DummyScaler()

        if monitor == 'val_mae':
            self.metric = mae
            self.monitor = 'val_mae'
        elif monitor == 'val_acc':
            self.metric = accuracy
            self.filepath = self.filepath.replace('val_mae', 'val_acc')
            self.monitor = 'val_acc'

        if mode == 'min':
            self.monitor_op = np.less
            self.best = np.Inf
        elif mode == 'max':
            self.monitor_op = np.greater
            self.best = -np.Inf
        else:
            if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
                self.monitor_op = np.greater
                self.best = -np.Inf
            else:
                self.monitor_op = np.less
                self.best = np.Inf
Ejemplo n.º 8
0
    def __init__(self,
                 nfeat_edge=None,
                 nfeat_global=None,
                 nfeat_node=None,
                 nblocks=3,
                 lr=1e-3,
                 n1=64,
                 n2=32,
                 n3=16,
                 nvocal=95,
                 embedding_dim=16,
                 nbvocal=None,
                 bond_embedding_dim=None,
                 ngvocal=None,
                 global_embedding_dim=None,
                 npass=3,
                 ntarget=1,
                 act=softplus2,
                 is_classification=False,
                 loss="mse",
                 metrics=None,
                 l2_coef=None,
                 dropout=None,
                 graph_converter=None,
                 target_scaler=DummyScaler(),
                 optimizer_kwargs=None,
                 dropout_on_predict=False):

        # Build the MEG Model
        model = make_megnet_model(nfeat_edge=nfeat_edge,
                                  nfeat_global=nfeat_global,
                                  nfeat_node=nfeat_node,
                                  nblocks=nblocks,
                                  n1=n1,
                                  n2=n2,
                                  n3=n3,
                                  nvocal=nvocal,
                                  embedding_dim=embedding_dim,
                                  nbvocal=nbvocal,
                                  bond_embedding_dim=bond_embedding_dim,
                                  ngvocal=ngvocal,
                                  global_embedding_dim=global_embedding_dim,
                                  npass=npass,
                                  ntarget=ntarget,
                                  act=act,
                                  is_classification=is_classification,
                                  l2_coef=l2_coef,
                                  dropout=dropout,
                                  dropout_on_predict=dropout_on_predict)

        # Compile the model with the optimizer
        loss = 'binary_crossentropy' if is_classification else loss

        opt_params = {'lr': lr}
        if optimizer_kwargs is not None:
            opt_params.update(optimizer_kwargs)
        model.compile(Adam(**opt_params), loss, metrics=metrics)

        if graph_converter is None:
            graph_converter = CrystalGraph(cutoff=4,
                                           bond_converter=GaussianDistance(
                                               np.linspace(0, 5, 100), 0.5))

        super().__init__(model=model,
                         target_scaler=target_scaler,
                         graph_converter=graph_converter)
Ejemplo n.º 9
0
class ModelCheckpointMAE(Callback):
    """
    Save the best MAE model with target scaler

    Args:
        filepath (string): path to save the model file with format. For example
            `weights.{epoch:02d}-{val_mae:.6f}.hdf5` will save the corresponding epoch and
            val_mae in the filename
        monitor (string): quantity to monitor, default to "val_mae"
        verbose (int): 0 for no training log, 1 for only epoch-level log and 2 for batch-level log
        save_best_only (bool): whether to save only the best model
        save_weights_only (bool): whether to save the weights only excluding model structure
        val_gen (generator): validation generator
        steps_per_val (int): steps per epoch for validation generator
        target_scaler (object): exposing inverse_transform method to scale the output
        period (int): number of epoch interval for this callback
        mode: (string) choose from "min", "max" or "auto"
    """
    def __init__(
            self,
            filepath: str = './callback/val_mae_{epoch:05d}_{val_mae:.6f}.hdf5',
            monitor: str = 'val_mae',
            verbose: int = 0,
            save_best_only: bool = True,
            save_weights_only: bool = False,
            val_gen: Iterable = None,
            steps_per_val: int = None,
            target_scaler: Scaler = None,
            period: int = 1,
            mode: str = 'auto'):
        super().__init__()
        if val_gen is None:
            raise ValueError('No validation data is provided!')
        self.verbose = verbose
        if self.verbose > 0:
            logging.basicConfig(level=logging.INFO)
        self.filepath = filepath
        self.save_best_only = save_best_only
        self.save_weights_only = save_weights_only
        self.period = period
        self.epochs_since_last_save = 0
        self.val_gen = val_gen
        self.steps_per_val = steps_per_val
        self.target_scaler = target_scaler
        if self.target_scaler is None:
            self.target_scaler = DummyScaler()

        if monitor == 'val_mae':
            self.metric = mae
            self.monitor = 'val_mae'
        elif monitor == 'val_acc':
            self.metric = accuracy
            self.filepath = self.filepath.replace('val_mae', 'val_acc')
            self.monitor = 'val_acc'

        if mode == 'min':
            self.monitor_op = np.less
            self.best = np.Inf
        elif mode == 'max':
            self.monitor_op = np.greater
            self.best = -np.Inf
        else:
            if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
                self.monitor_op = np.greater
                self.best = -np.Inf
            else:
                self.monitor_op = np.less
                self.best = np.Inf

    def on_epoch_end(self, epoch: int, logs: Dict = None) -> None:
        """
        Codes called by the callback at the end of epoch
        Args:
            epoch (int): epoch id
            logs (dict): logs of training

        Returns:
            None
        """
        self.epochs_since_last_save += 1
        if self.epochs_since_last_save >= self.period:
            self.epochs_since_last_save = 0
            val_pred = []
            val_y = []
            for i in range(self.steps_per_val):
                val_data = self.val_gen[i]
                nb_atom = _count(np.array(val_data[0][-2]))
                stop_training = self.model.stop_training  # save stop_trainings state
                pred_ = self.model.predict(val_data[0])
                self.model.stop_training = stop_training
                val_pred.append(
                    self.target_scaler.inverse_transform(
                        pred_[0, :, :], nb_atom[:, None]))
                val_y.append(
                    self.target_scaler.inverse_transform(
                        val_data[1][0, :, :], nb_atom[:, None]))
            current = self.metric(np.concatenate(val_y, axis=0),
                                  np.concatenate(val_pred, axis=0))
            filepath = self.filepath.format(**{
                "epoch": epoch + 1,
                self.monitor: current
            })

            if self.save_best_only:
                if current is None:
                    warnings.warn(
                        'Can save best model only with %s available, '
                        'skipping.' % self.monitor, RuntimeWarning)
                else:
                    if self.monitor_op(current, self.best):
                        logger.info(
                            '\nEpoch %05d: %s improved from %0.5f to %0.5f,'
                            ' saving model to %s' %
                            (epoch + 1, self.monitor, self.best, current,
                             filepath))
                        self.best = current
                        if self.save_weights_only:
                            self.model.save_weights(filepath, overwrite=True)
                        else:
                            self.model.save(filepath, overwrite=True)
                    else:
                        if self.verbose > 0:
                            logger.info(
                                '\nEpoch %05d: %s did not improve from %0.5f' %
                                (epoch + 1, self.monitor, self.best))
            else:
                logger.info('\nEpoch %05d: saving model to %s' %
                            (epoch + 1, filepath))
                if self.save_weights_only:
                    self.model.save_weights(filepath, overwrite=True)
                else:
                    self.model.save(filepath, overwrite=True)
Ejemplo n.º 10
0
class GeneratorLog(Callback):
    """
    This callback logger.info out the MAE for train_generator and validation_generator every n_every steps.
    The default keras training log does not contain method to rescale the results, thus is not physically
    intuitive.

    Args:
        train_gen: (generator), yield (x, y) pairs for training
        steps_per_train: (int) number of generator steps per training epoch
        val_gen: (generator), yield (x, y) pairs for validation.
        steps_per_val: (int) number of generator steps per epoch for validation data
        y_scaler: (object) y_scaler.inverse_transform is used to convert the predicted values to its original scale
        n_every: (int) epoch interval for showing the log
        val_names: (list of string) variable names
        val_units: (list of string) variable units
        is_pa: (bool) whether it is a per-atom quantity
    """
    def __init__(self,
                 train_gen,
                 steps_per_train=None,
                 val_gen=None,
                 steps_per_val=None,
                 y_scaler=None,
                 n_every=5,
                 val_names=None,
                 val_units=None,
                 is_pa=False):
        super().__init__()
        self.train_gen = train_gen
        self.val_gen = val_gen
        self.steps_per_train = steps_per_train
        self.steps_per_val = steps_per_val
        self.yscaler = y_scaler
        self.epochs = []
        self.total_epoch = 0
        self.n_every = n_every
        self.val_names = val_names
        self.val_units = val_units
        self.is_pa = is_pa
        if self.yscaler is None:
            self.yscaler = DummyScaler()

    def on_epoch_end(self, epoch, logs=None):
        """
        Standard keras callback interface, executed at the end of epoch
        """
        self.total_epoch += 1
        if self.total_epoch % self.n_every == 0:
            train_pred = []
            train_y = []
            for i in range(self.steps_per_train):
                train_data = self.train_gen[i]
                nb_atom = _count(np.array(train_data[0][-2]))
                if not self.is_pa:
                    nb_atom = np.ones_like(nb_atom)
                pred_ = self.model.predict(train_data[0])
                train_pred.append(
                    self.yscaler.inverse_transform(pred_[0, :, :]) *
                    nb_atom[:, None])
                train_y.append(
                    self.yscaler.inverse_transform(train_data[1][0, :, :]) *
                    nb_atom[:, None])
            train_mae = np.mean(np.abs(
                np.concatenate(train_pred, axis=0) -
                np.concatenate(train_y, axis=0)),
                                axis=0)
            logger.info("Train MAE")
            _print_mae(self.val_names, train_mae, self.val_units)
            val_pred = []
            val_y = []
            for i in range(self.steps_per_val):
                val_data = self.val_gen[i]
                nb_atom = _count(np.array(val_data[0][-2]))
                if not self.is_pa:
                    nb_atom = np.ones_like(nb_atom)
                pred_ = self.model.predict(val_data[0])
                val_pred.append(
                    self.yscaler.inverse_transform(pred_[0, :, :]) *
                    nb_atom[:, None])
                val_y.append(
                    self.yscaler.inverse_transform(val_data[1][0, :, :]) *
                    nb_atom[:, None])
            val_mae = np.mean(np.abs(
                np.concatenate(val_pred, axis=0) -
                np.concatenate(val_y, axis=0)),
                              axis=0)
            logger.info("Test MAE")
            _print_mae(self.val_names, val_mae, self.val_units)
            self.model.history.history.setdefault("train_mae",
                                                  []).append(train_mae)
            self.model.history.history.setdefault("val_mae",
                                                  []).append(val_mae)