예제 #1
0
    def test_reduce_lr_upon_nan(self):
        with ScratchDir('.'):
            callbacks = [ReduceLRUponNan(patience=100)]
            self.assertAlmostEqual(float(kb.get_value(self.model.optimizer.lr)), 1e-3)
            gen = Generator(self.x, np.array([1, np.nan]).reshape((1, 2, 1)))
            self.model.fit_generator(gen, steps_per_epoch=1, epochs=1, callbacks=callbacks, verbose=0)
            self.assertAlmostEqual(float(kb.get_value(self.model.optimizer.lr)), 0.5e-3)

            inp = [
                Input(shape=(None, self.n_feature)),
                Input(shape=(None, self.n_bond_features)),
                Input(shape=(None, self.n_global_features)),
                Input(shape=(None,), dtype='int32'),
                Input(shape=(None,), dtype='int32'),
                Input(shape=(None,), dtype='int32'),
                Input(shape=(None,), dtype='int32'),
            ]
            units_v = [2, 2]
            units_e = [2, 2]
            units_u = [2, ]
            layer = MEGNetLayer(units_v, units_e, units_u)
            out = layer(inp)
            out = Dense(1)(out[2])
            model = Model(inputs=inp, outputs=out)
            model.compile(loss='mse', optimizer='adam')
            x = [np.random.normal(size=(1, 4, self.n_feature)),
                     np.random.normal(size=(1, 6, self.n_bond_features)),
                     np.random.normal(size=(1, 2, self.n_global_features)),
                     np.array([[0, 0, 1, 1, 2, 3]]),
                     np.array([[1, 1, 0, 0, 3, 2]]),
                     np.array([[0, 0, 1, 1]]),
                     np.array([[0, 0, 0, 0, 1, 1]]),
                     ]
            y = np.random.normal(size=(1, 2, 1))
            train_gen = Generator(x, y)

            callbacks = [ReduceLRUponNan(filepath='./val_mae_{epoch:05d}_{val_mae:.6f}.hdf5', patience=100),
                         ModelCheckpointMAE(filepath='./val_mae_{epoch:05d}_{val_mae:.6f}.hdf5', val_gen=train_gen,
                                            steps_per_val=1)
                         ]
            # 1. involve training and saving
            model.fit_generator(train_gen, steps_per_epoch=1, epochs=2, callbacks=callbacks, verbose=1)
            # 2. throw nan loss, trigger ReduceLRUponNan
            model.fit_generator(gen, steps_per_epoch=1, epochs=2, callbacks=callbacks, verbose=1)
            # 3. Normal training, recover saved model from 1
            model.fit_generator(train_gen, steps_per_epoch=1, epochs=2, callbacks=callbacks, verbose=1)
            self.assertAlmostEqual(float(kb.get_value(model.optimizer.lr)), 0.25e-3)
예제 #2
0
파일: models.py 프로젝트: wangvei/megnet
    def train_from_graphs(self,
                          train_graphs,
                          train_targets,
                          validation_graphs=None,
                          validation_targets=None,
                          epochs=1000,
                          batch_size=128,
                          verbose=1,
                          callbacks=None,
                          prev_model=None,
                          **kwargs
                          ):

        # load from saved model
        if prev_model:
            self.load_weights(prev_model)
        is_classification = 'entropy' in self.model.loss
        monitor = 'val_acc' if is_classification else 'val_mae'
        mode = 'max' if is_classification else 'min'
        dirname = kwargs.pop('dirname', 'callback')
        if not os.path.isdir(dirname):
            os.makedirs(dirname)
        if callbacks is None:
            # with this call back you can stop the model training by `touch STOP`
            callbacks = [ManualStop()]
        callbacks.append(ReduceLRUponNan())
        train_nb_atoms = [len(i['atom']) for i in train_graphs]
        train_targets = [self.target_scaler.transform(i, j) for i, j in zip(train_targets, train_nb_atoms)]
        train_targets = np.array(train_targets).ravel()

        if validation_graphs is not None:
            filepath = pjoin(dirname, 'val_mae_{epoch:05d}_{%s:.6f}.hdf5' % monitor)
            val_nb_atoms = [len(i['atom']) for i in validation_graphs]
            validation_targets = [self.target_scaler.transform(i, j) for i, j in zip(validation_targets, val_nb_atoms)]
            validation_targets = np.array(validation_targets).ravel()
            val_inputs = self.graph_convertor.get_flat_data(validation_graphs, validation_targets)

            val_generator = self._create_generator(*val_inputs,
                                                   batch_size=batch_size)
            steps_per_val = int(np.ceil(len(validation_graphs) / batch_size))
            callbacks.extend([ModelCheckpointMAE(filepath=filepath,
                                                 monitor=monitor,
                                                 mode=mode,
                                                 save_best_only=True,
                                                 save_weights_only=False,
                                                 val_gen=val_generator,
                                                 steps_per_val=steps_per_val,
                                                 y_scaler=None)])
        else:
            val_generator = None
            steps_per_val = None
        train_inputs = self.graph_convertor.get_flat_data(train_graphs, train_targets)
        train_generator = self._create_generator(*train_inputs, batch_size=batch_size)
        steps_per_train = int(np.ceil(len(train_graphs) / batch_size))
        self.fit_generator(train_generator, steps_per_epoch=steps_per_train,
                           validation_data=val_generator, validation_steps=steps_per_val,
                           epochs=epochs, verbose=verbose, callbacks=callbacks, **kwargs)
예제 #3
0
 def test_reduce_lr_upon_nan(self):
     callbacks = [ReduceLRUponNan(patience=100)]
     self.assertAlmostEqual(float(kb.get_value(self.model.optimizer.lr)),
                            1e-3)
     gen = Generator(self.x, np.array([1, np.nan]).reshape((1, 2, 1)))
     self.model.fit_generator(gen,
                              steps_per_epoch=1,
                              epochs=1,
                              callbacks=callbacks,
                              verbose=0)
     self.assertAlmostEqual(float(kb.get_value(self.model.optimizer.lr)),
                            0.5e-3)
예제 #4
0
파일: base.py 프로젝트: dgaines2/megnet
    def train_from_graphs(self,
                          train_graphs: List[Dict],
                          train_targets: List[float],
                          validation_graphs: List[Dict] = None,
                          validation_targets: List[float] = None,
                          sample_weights: List[float] = None,
                          epochs: int = 1000,
                          batch_size: int = 128,
                          verbose: int = 1,
                          pad_string: str = None,
                          callbacks: List[Callback] = None,
                          prev_model: str = None,
                          lr_scaling_factor: float = 0.5,
                          patience: int = 500,
                          save_checkpoint: bool = True,
                          automatic_correction: bool = True,
                          **kwargs) -> "GraphModel":
        """
        Args:
            train_graphs: (list) list of graph dictionaries
            train_targets: (list) list of target values
            validation_graphs: (list) list of graphs as validation
            validation_targets: (list) list of validation targets
            sample_weights: (list) list of sample weights
            epochs: (int) number of epochs
            batch_size: (int) training batch size
            verbose: (int) keras fit verbose, 0 no progress bar, 1 only at the epoch end and 2 every batch
            pad_string: (str) string to add in front of callback filepath
            callbacks: (list) megnet or keras callback functions for training
            prev_model: (str) file name for previously saved model
            lr_scaling_factor: (float, less than 1) scale the learning rate down when nan loss encountered
            patience: (int) patience for early stopping
            save_checkpoint: (bool) whether to save checkpoint
            automatic_correction: (bool) correct nan errors
            **kwargs:
        """
        # load from saved model
        if prev_model:
            self.load_weights(prev_model)
        is_classification = 'entropy' in str(self.model.loss)
        monitor = 'val_acc' if is_classification else 'val_mae'
        mode = 'max' if is_classification else 'min'
        dirname = kwargs.pop('dirname', 'callback')
        has_sample_weights = sample_weights is not None
        if not os.path.isdir(dirname):
            os.makedirs(dirname)
        if callbacks is None:
            # with this call back you can stop the model training by `touch STOP`
            callbacks = [ManualStop()]
        train_nb_atoms = [len(i['atom']) for i in train_graphs]
        train_targets = [
            self.target_scaler.transform(i, j)
            for i, j in zip(train_targets, train_nb_atoms)
        ]
        if (validation_graphs is not None) and (validation_targets
                                                is not None):
            filepath = ('%s_{epoch:05d}_{%s:.6f}.hdf5' % (monitor, monitor))
            val_nb_atoms = [len(i['atom']) for i in validation_graphs]
            validation_targets = [
                self.target_scaler.transform(i, j)
                for i, j in zip(validation_targets, val_nb_atoms)
            ]
            val_inputs = self.graph_converter.get_flat_data(
                validation_graphs, validation_targets)

            val_generator = self._create_generator(*val_inputs,
                                                   batch_size=batch_size)
            steps_per_val = int(np.ceil(len(validation_graphs) / batch_size))
            if save_checkpoint:
                callbacks.extend([
                    ModelCheckpointMAE(pad_string=pad_string,
                                       filepath=filepath,
                                       monitor=monitor,
                                       mode=mode,
                                       save_best_only=True,
                                       save_weights_only=False,
                                       val_gen=val_generator,
                                       steps_per_val=steps_per_val,
                                       target_scaler=self.target_scaler)
                ])
                # avoid running validation twice in an epoch
                # val_generator = None  # type: ignore
                # steps_per_val = None  # type: ignore

            if automatic_correction:
                callbacks.extend([
                    ReduceLRUponNan(filepath=filepath,
                                    monitor=monitor,
                                    mode=mode,
                                    factor=lr_scaling_factor,
                                    patience=patience,
                                    has_sample_weights=has_sample_weights)
                ])
        else:
            val_generator = None  # type: ignore
            steps_per_val = None  # type: ignore

        train_inputs = self.graph_converter.get_flat_data(
            train_graphs, train_targets)
        # check dimension match
        self.check_dimension(train_graphs[0])
        train_generator = self._create_generator(*train_inputs,
                                                 sample_weights=sample_weights,
                                                 batch_size=batch_size)
        steps_per_train = int(np.ceil(len(train_graphs) / batch_size))
        self.fit(train_generator,
                 steps_per_epoch=steps_per_train,
                 validation_data=val_generator,
                 validation_steps=steps_per_val,
                 epochs=epochs,
                 verbose=verbose,
                 callbacks=callbacks,
                 **kwargs)
        return self
예제 #5
0
    Args:
        ids (List): list of ids

    Returns:
        list of graphs and list of target values
    """
    ids = [i for i in ids if i in final_graphs]
    return [final_graphs[i] for i in ids], [final_targets[i] for i in ids]


train_graphs, train_targets = get_graphs_targets(train_ids)
val_graphs, val_targets = get_graphs_targets(val_ids)

#  5. Model training
callbacks = [ReduceLRUponNan(patience=500), ManualStop()]
model.train_from_graphs(train_graphs,
                        train_targets,
                        val_graphs,
                        val_targets,
                        epochs=EPOCHS,
                        verbose=2,
                        initial_epoch=0,
                        callbacks=callbacks)

#  6. Model testing

##  load the best model with lowest validation error
files = glob("./callback/*.hdf5")
best_model = sorted(files, key=os.path.getctime)[-1]
예제 #6
0
파일: models.py 프로젝트: xue-smile/megnet
    def train_from_graphs(self,
                          train_graphs: List[Dict],
                          train_targets: List[float],
                          validation_graphs: List[Dict] = None,
                          validation_targets: List[float] = None,
                          epochs: int = 1000,
                          batch_size: int = 128,
                          verbose: int = 1,
                          callbacks: List[Callback] = None,
                          prev_model: str = None,
                          lr_scaling_factor: float = 0.5,
                          patience: int = 500,
                          save_checkpoint: bool = True,
                          automatic_correction: bool = True,
                          **kwargs) -> None:
        """
        # TODO write doc...
        :param train_graphs:
        :param train_targets:
        :param validation_graphs:
        :param validation_targets:
        :param epochs:
        :param batch_size:
        :param verbose:
        :param callbacks:
        :param prev_model:
        :param lr_scaling_factor:
        :param patience:
        :param save_checkpoint:
        :param automatic_correction:
        :param kwargs:
        :return:
        """
        # load from saved model
        if prev_model:
            self.load_weights(prev_model)
        is_classification = 'entropy' in self.model.loss
        monitor = 'val_acc' if is_classification else 'val_mae'
        mode = 'max' if is_classification else 'min'
        dirname = kwargs.pop('dirname', 'callback')
        if not os.path.isdir(dirname):
            os.makedirs(dirname)
        if callbacks is None:
            # with this call back you can stop the model training by `touch STOP`
            callbacks = [ManualStop()]
        train_nb_atoms = [len(i['atom']) for i in train_graphs]
        train_targets = [
            self.target_scaler.transform(i, j)
            for i, j in zip(train_targets, train_nb_atoms)
        ]

        if validation_graphs is not None:
            filepath = os.path.join(
                dirname, '%s_{epoch:05d}_{%s:.6f}.hdf5' % (monitor, monitor))
            val_nb_atoms = [len(i['atom']) for i in validation_graphs]
            validation_targets = [
                self.target_scaler.transform(i, j)
                for i, j in zip(validation_targets, val_nb_atoms)
            ]
            val_inputs = self.graph_converter.get_flat_data(
                validation_graphs, validation_targets)

            val_generator = self._create_generator(*val_inputs,
                                                   batch_size=batch_size)
            steps_per_val = int(np.ceil(len(validation_graphs) / batch_size))
            if automatic_correction:
                callbacks.extend([
                    ReduceLRUponNan(
                        filepath=filepath,
                        monitor=monitor,
                        mode=mode,
                        factor=lr_scaling_factor,
                        patience=patience,
                    )
                ])
            if save_checkpoint:
                callbacks.extend([
                    ModelCheckpointMAE(filepath=filepath,
                                       monitor=monitor,
                                       mode=mode,
                                       save_best_only=True,
                                       save_weights_only=False,
                                       val_gen=val_generator,
                                       steps_per_val=steps_per_val,
                                       target_scaler=self.target_scaler)
                ])
        else:
            val_generator = None
            steps_per_val = None
        train_inputs = self.graph_converter.get_flat_data(
            train_graphs, train_targets)
        # check dimension match
        self.check_dimension(train_graphs[0])
        train_generator = self._create_generator(*train_inputs,
                                                 batch_size=batch_size)
        steps_per_train = int(np.ceil(len(train_graphs) / batch_size))
        self.fit_generator(train_generator,
                           steps_per_epoch=steps_per_train,
                           validation_data=val_generator,
                           validation_steps=steps_per_val,
                           epochs=epochs,
                           verbose=verbose,
                           callbacks=callbacks,
                           **kwargs)