def __init__( self, filepath: str = './callback/val_mae_{epoch:05d}_{val_mae:.6f}.hdf5', monitor: str = 'val_mae', verbose: int = 0, save_best_only: bool = True, save_weights_only: bool = False, val_gen: Iterable = None, steps_per_val: int = None, target_scaler: Scaler = None, period: int = 1, mode: str = 'auto'): super().__init__() if val_gen is None: raise ValueError('No validation data is provided!') self.verbose = verbose if self.verbose > 0: logging.basicConfig(level=logging.INFO) self.filepath = filepath self.save_best_only = save_best_only self.save_weights_only = save_weights_only self.period = period self.epochs_since_last_save = 0 self.val_gen = val_gen self.steps_per_val = steps_per_val self.target_scaler = target_scaler if self.target_scaler is None: self.target_scaler = DummyScaler() if monitor == 'val_mae': self.metric = mae self.monitor = 'val_mae' elif monitor == 'val_acc': self.metric = accuracy self.filepath = self.filepath.replace('val_mae', 'val_acc') self.monitor = 'val_acc' if mode == 'min': self.monitor_op = np.less self.best = np.Inf elif mode == 'max': self.monitor_op = np.greater self.best = -np.Inf else: if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): self.monitor_op = np.greater self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf
def __init__(self, model, graph_converter, target_scaler=DummyScaler(), metadata=None, **kwargs): self.model = model self.graph_converter = graph_converter self.target_scaler = target_scaler self.metadata = metadata or {}
def __init__(self, train_gen, steps_per_train=None, val_gen=None, steps_per_val=None, y_scaler=None, n_every=5, val_names=None, val_units=None, is_pa=False): super().__init__() self.train_gen = train_gen self.val_gen = val_gen self.steps_per_train = steps_per_train self.steps_per_val = steps_per_val self.yscaler = y_scaler self.epochs = [] self.total_epoch = 0 self.n_every = n_every self.val_names = val_names self.val_units = val_units self.is_pa = is_pa if self.yscaler is None: self.yscaler = DummyScaler()
def __init__(self, train_gen: Iterable, steps_per_train: int = None, val_gen: Iterable = None, steps_per_val: int = None, y_scaler: Scaler = None, n_every: int = 5, val_names: List[str] = None, val_units: List[str] = None, is_pa: bool = False): super().__init__() self.train_gen = train_gen self.val_gen = val_gen self.steps_per_train = steps_per_train self.steps_per_val = steps_per_val self.yscaler = y_scaler self.epochs = [] self.total_epoch = 0 self.n_every = n_every self.val_names = val_names self.val_units = val_units self.is_pa = is_pa if self.yscaler is None: self.yscaler = DummyScaler()
def __init__(self, model: Model, graph_converter: StructureGraph, target_scaler: Scaler = DummyScaler(), metadata: Dict = None, **kwargs): """ Args: model: (keras model) graph_converter: (object) a object that turns a structure to a graph, check `megnet.data.crystal` target_scaler: (object) a scaler object for converting targets, check `megnet.utils.preprocessing` metadata: (dict) An optional dict of metadata associated with the model. Recommended to incorporate some basic information such as units, MAE performance, etc. """ self.model = model self.graph_converter = graph_converter self.target_scaler = target_scaler self.metadata = metadata or {}
def __init__(self, nfeat_edge: int = None, nfeat_global: int = None, nfeat_node: int = None, nblocks: int = 3, lr: float = 1e-3, n1: int = 64, n2: int = 32, n3: int = 16, nvocal: int = 95, embedding_dim: int = 16, nbvocal: int = None, bond_embedding_dim: int = None, ngvocal: int = None, global_embedding_dim: int = None, npass: int = 3, ntarget: int = 1, act: Callable = softplus2, is_classification: bool = False, loss: str = "mse", metrics: List[str] = None, l2_coef: float = None, dropout: float = None, graph_converter: StructureGraph = None, target_scaler: Scaler = DummyScaler(), optimizer_kwargs: Dict = None, dropout_on_predict: bool = False): """ Args: nfeat_edge: (int) number of bond features nfeat_global: (int) number of state features nfeat_node: (int) number of atom features nblocks: (int) number of MEGNetLayer blocks lr: (float) learning rate n1: (int) number of hidden units in layer 1 in MEGNetLayer n2: (int) number of hidden units in layer 2 in MEGNetLayer n3: (int) number of hidden units in layer 3 in MEGNetLayer nvocal: (int) number of total element embedding_dim: (int) number of embedding dimension nbvocal: (int) number of bond types if bond attributes are types bond_embedding_dim: (int) number of bond embedding dimension ngvocal: (int) number of global types if global attributes are types global_embedding_dim: (int) number of global embedding dimension npass: (int) number of recurrent steps in Set2Set layer ntarget: (int) number of output targets act: (object) activation function l2_coef: (float or None) l2 regularization parameter is_classification: (bool) whether it is a classification task loss: (object or str) loss function metrics: (list or dict) List or dictionary of Keras metrics to be evaluated by the model during training and testing dropout: (float) dropout rate graph_converter: (object) object that exposes a "convert" method for structure to graph conversion target_scaler: (object) object that exposes a "transform" and "inverse_transform" methods for transforming the target values optimizer_kwargs (dict): extra keywords for optimizer, for example clipnorm and clipvalue """ # Build the MEG Model model = make_megnet_model(nfeat_edge=nfeat_edge, nfeat_global=nfeat_global, nfeat_node=nfeat_node, nblocks=nblocks, n1=n1, n2=n2, n3=n3, nvocal=nvocal, embedding_dim=embedding_dim, nbvocal=nbvocal, bond_embedding_dim=bond_embedding_dim, ngvocal=ngvocal, global_embedding_dim=global_embedding_dim, npass=npass, ntarget=ntarget, act=act, is_classification=is_classification, l2_coef=l2_coef, dropout=dropout, dropout_on_predict=dropout_on_predict) # Compile the model with the optimizer loss = 'binary_crossentropy' if is_classification else loss opt_params = {'lr': lr} if optimizer_kwargs is not None: opt_params.update(optimizer_kwargs) model.compile(Adam(**opt_params), loss, metrics=metrics) if graph_converter is None: graph_converter = CrystalGraph(cutoff=4, bond_converter=GaussianDistance( np.linspace(0, 5, 100), 0.5)) super().__init__(model=model, target_scaler=target_scaler, graph_converter=graph_converter)
def __init__( self, filepath: str = './callback/val_mae_{epoch:05d}_{val_mae:.6f}.hdf5', monitor: str = 'val_mae', verbose: int = 0, save_best_only: bool = True, save_weights_only: bool = False, val_gen: Sequence = None, steps_per_val: int = None, target_scaler: Scaler = None, period: int = 1, mode: str = 'auto'): """ Args: filepath (string): path to save the model file with format. For example `weights.{epoch:02d}-{val_mae:.6f}.hdf5` will save the corresponding epoch and val_mae in the filename monitor (string): quantity to monitor, default to "val_mae" verbose (int): 0 for no training log, 1 for only epoch-level log and 2 for batch-level log save_best_only (bool): whether to save only the best model save_weights_only (bool): whether to save the weights only excluding model structure val_gen (generator): validation generator steps_per_val (int): steps per epoch for validation generator target_scaler (object): exposing inverse_transform method to scale the output period (int): number of epoch interval for this callback mode: (string) choose from "min", "max" or "auto" """ super().__init__() if val_gen is None: raise ValueError('No validation data is provided!') self.verbose = verbose if self.verbose > 0: logging.basicConfig(level=logging.INFO) self.filepath = filepath self.save_best_only = save_best_only self.save_weights_only = save_weights_only self.period = period self.epochs_since_last_save = 0 self.val_gen = val_gen self.steps_per_val = steps_per_val or len(val_gen) self.target_scaler = target_scaler or DummyScaler() if monitor == 'val_mae': self.metric = mae self.monitor = 'val_mae' elif monitor == 'val_acc': self.metric = accuracy self.filepath = self.filepath.replace('val_mae', 'val_acc') self.monitor = 'val_acc' if mode == 'min': self.monitor_op = np.less self.best = np.Inf elif mode == 'max': self.monitor_op = np.greater self.best = -np.Inf else: if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): self.monitor_op = np.greater self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf
def __init__(self, nfeat_edge=None, nfeat_global=None, nfeat_node=None, nblocks=3, lr=1e-3, n1=64, n2=32, n3=16, nvocal=95, embedding_dim=16, nbvocal=None, bond_embedding_dim=None, ngvocal=None, global_embedding_dim=None, npass=3, ntarget=1, act=softplus2, is_classification=False, loss="mse", metrics=None, l2_coef=None, dropout=None, graph_converter=None, target_scaler=DummyScaler(), optimizer_kwargs=None, dropout_on_predict=False): # Build the MEG Model model = make_megnet_model(nfeat_edge=nfeat_edge, nfeat_global=nfeat_global, nfeat_node=nfeat_node, nblocks=nblocks, n1=n1, n2=n2, n3=n3, nvocal=nvocal, embedding_dim=embedding_dim, nbvocal=nbvocal, bond_embedding_dim=bond_embedding_dim, ngvocal=ngvocal, global_embedding_dim=global_embedding_dim, npass=npass, ntarget=ntarget, act=act, is_classification=is_classification, l2_coef=l2_coef, dropout=dropout, dropout_on_predict=dropout_on_predict) # Compile the model with the optimizer loss = 'binary_crossentropy' if is_classification else loss opt_params = {'lr': lr} if optimizer_kwargs is not None: opt_params.update(optimizer_kwargs) model.compile(Adam(**opt_params), loss, metrics=metrics) if graph_converter is None: graph_converter = CrystalGraph(cutoff=4, bond_converter=GaussianDistance( np.linspace(0, 5, 100), 0.5)) super().__init__(model=model, target_scaler=target_scaler, graph_converter=graph_converter)
class ModelCheckpointMAE(Callback): """ Save the best MAE model with target scaler Args: filepath (string): path to save the model file with format. For example `weights.{epoch:02d}-{val_mae:.6f}.hdf5` will save the corresponding epoch and val_mae in the filename monitor (string): quantity to monitor, default to "val_mae" verbose (int): 0 for no training log, 1 for only epoch-level log and 2 for batch-level log save_best_only (bool): whether to save only the best model save_weights_only (bool): whether to save the weights only excluding model structure val_gen (generator): validation generator steps_per_val (int): steps per epoch for validation generator target_scaler (object): exposing inverse_transform method to scale the output period (int): number of epoch interval for this callback mode: (string) choose from "min", "max" or "auto" """ def __init__( self, filepath: str = './callback/val_mae_{epoch:05d}_{val_mae:.6f}.hdf5', monitor: str = 'val_mae', verbose: int = 0, save_best_only: bool = True, save_weights_only: bool = False, val_gen: Iterable = None, steps_per_val: int = None, target_scaler: Scaler = None, period: int = 1, mode: str = 'auto'): super().__init__() if val_gen is None: raise ValueError('No validation data is provided!') self.verbose = verbose if self.verbose > 0: logging.basicConfig(level=logging.INFO) self.filepath = filepath self.save_best_only = save_best_only self.save_weights_only = save_weights_only self.period = period self.epochs_since_last_save = 0 self.val_gen = val_gen self.steps_per_val = steps_per_val self.target_scaler = target_scaler if self.target_scaler is None: self.target_scaler = DummyScaler() if monitor == 'val_mae': self.metric = mae self.monitor = 'val_mae' elif monitor == 'val_acc': self.metric = accuracy self.filepath = self.filepath.replace('val_mae', 'val_acc') self.monitor = 'val_acc' if mode == 'min': self.monitor_op = np.less self.best = np.Inf elif mode == 'max': self.monitor_op = np.greater self.best = -np.Inf else: if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): self.monitor_op = np.greater self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf def on_epoch_end(self, epoch: int, logs: Dict = None) -> None: """ Codes called by the callback at the end of epoch Args: epoch (int): epoch id logs (dict): logs of training Returns: None """ self.epochs_since_last_save += 1 if self.epochs_since_last_save >= self.period: self.epochs_since_last_save = 0 val_pred = [] val_y = [] for i in range(self.steps_per_val): val_data = self.val_gen[i] nb_atom = _count(np.array(val_data[0][-2])) stop_training = self.model.stop_training # save stop_trainings state pred_ = self.model.predict(val_data[0]) self.model.stop_training = stop_training val_pred.append( self.target_scaler.inverse_transform( pred_[0, :, :], nb_atom[:, None])) val_y.append( self.target_scaler.inverse_transform( val_data[1][0, :, :], nb_atom[:, None])) current = self.metric(np.concatenate(val_y, axis=0), np.concatenate(val_pred, axis=0)) filepath = self.filepath.format(**{ "epoch": epoch + 1, self.monitor: current }) if self.save_best_only: if current is None: warnings.warn( 'Can save best model only with %s available, ' 'skipping.' % self.monitor, RuntimeWarning) else: if self.monitor_op(current, self.best): logger.info( '\nEpoch %05d: %s improved from %0.5f to %0.5f,' ' saving model to %s' % (epoch + 1, self.monitor, self.best, current, filepath)) self.best = current if self.save_weights_only: self.model.save_weights(filepath, overwrite=True) else: self.model.save(filepath, overwrite=True) else: if self.verbose > 0: logger.info( '\nEpoch %05d: %s did not improve from %0.5f' % (epoch + 1, self.monitor, self.best)) else: logger.info('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath)) if self.save_weights_only: self.model.save_weights(filepath, overwrite=True) else: self.model.save(filepath, overwrite=True)
class GeneratorLog(Callback): """ This callback logger.info out the MAE for train_generator and validation_generator every n_every steps. The default keras training log does not contain method to rescale the results, thus is not physically intuitive. Args: train_gen: (generator), yield (x, y) pairs for training steps_per_train: (int) number of generator steps per training epoch val_gen: (generator), yield (x, y) pairs for validation. steps_per_val: (int) number of generator steps per epoch for validation data y_scaler: (object) y_scaler.inverse_transform is used to convert the predicted values to its original scale n_every: (int) epoch interval for showing the log val_names: (list of string) variable names val_units: (list of string) variable units is_pa: (bool) whether it is a per-atom quantity """ def __init__(self, train_gen, steps_per_train=None, val_gen=None, steps_per_val=None, y_scaler=None, n_every=5, val_names=None, val_units=None, is_pa=False): super().__init__() self.train_gen = train_gen self.val_gen = val_gen self.steps_per_train = steps_per_train self.steps_per_val = steps_per_val self.yscaler = y_scaler self.epochs = [] self.total_epoch = 0 self.n_every = n_every self.val_names = val_names self.val_units = val_units self.is_pa = is_pa if self.yscaler is None: self.yscaler = DummyScaler() def on_epoch_end(self, epoch, logs=None): """ Standard keras callback interface, executed at the end of epoch """ self.total_epoch += 1 if self.total_epoch % self.n_every == 0: train_pred = [] train_y = [] for i in range(self.steps_per_train): train_data = self.train_gen[i] nb_atom = _count(np.array(train_data[0][-2])) if not self.is_pa: nb_atom = np.ones_like(nb_atom) pred_ = self.model.predict(train_data[0]) train_pred.append( self.yscaler.inverse_transform(pred_[0, :, :]) * nb_atom[:, None]) train_y.append( self.yscaler.inverse_transform(train_data[1][0, :, :]) * nb_atom[:, None]) train_mae = np.mean(np.abs( np.concatenate(train_pred, axis=0) - np.concatenate(train_y, axis=0)), axis=0) logger.info("Train MAE") _print_mae(self.val_names, train_mae, self.val_units) val_pred = [] val_y = [] for i in range(self.steps_per_val): val_data = self.val_gen[i] nb_atom = _count(np.array(val_data[0][-2])) if not self.is_pa: nb_atom = np.ones_like(nb_atom) pred_ = self.model.predict(val_data[0]) val_pred.append( self.yscaler.inverse_transform(pred_[0, :, :]) * nb_atom[:, None]) val_y.append( self.yscaler.inverse_transform(val_data[1][0, :, :]) * nb_atom[:, None]) val_mae = np.mean(np.abs( np.concatenate(val_pred, axis=0) - np.concatenate(val_y, axis=0)), axis=0) logger.info("Test MAE") _print_mae(self.val_names, val_mae, self.val_units) self.model.history.history.setdefault("train_mae", []).append(train_mae) self.model.history.history.setdefault("val_mae", []).append(val_mae)