Exemple #1
0
    def load(Cls, model=None, params=None, preprocessor=None, **kwargs):
        """Load a model

        Parameters
        ----------
        model : str
            The path to load the model from the .ml4c file for inference.
        params : srt
            The path to load .params file with users' inputs.
        preprocessor : str
            The path to load the file with the sklearn preprocessor object.
        """
        kwargs["ml4chem_path"] = model
        kwargs["preprocessor"] = preprocessor

        with open(params) as ml4chem_params:
            ml4chem_params = json.load(ml4chem_params)
            model_type = ml4chem_params["model"].get("type")

            if model_type == "svm":
                model_params = ml4chem_params["model"]
                del model_params["name"]  # delete unneeded key, value
                del model_params["type"]  # delete unneeded key, value
                from ml4chem.models.kernelridge import KernelRidge

                weights = load(model)
                # TODO remove after de/serialization is fixed.
                weights = {
                    key.decode("utf-8"): value
                    for key, value in weights.items()
                }
                model_params.update({"weights": weights})
                model = KernelRidge(**model_params)
            else:
                # Instantiate the model class
                model_params = ml4chem_params["model"]
                del model_params["name"]  # delete unneeded key, value
                del model_params["type"]  # delete unneeded key, value
                from ml4chem.models.neuralnetwork import NeuralNetwork

                model = NeuralNetwork(**model_params)

        # Instantiation of fingerprint class
        fingerprint_params = ml4chem_params.get("fingerprints", None)

        if fingerprint_params is None:
            fingerprints = fingerprint_params
        else:
            name = fingerprint_params.get("name")
            del fingerprint_params["name"]

            fingerprints = dynamic_import(name, "ml4chem.fingerprints")
            fingerprints = fingerprints(**fingerprint_params)

        calc = Cls(fingerprints=fingerprints, model=model, **kwargs)

        return calc
Exemple #2
0
    def load_encoder(self, encoder, **kwargs):
        """Load an autoencoder in eval() mode

        Parameters
        ----------
        encoder : dict
            Dictionary with structure:

                >>> encoder = {'model': file.ml4c, 'params': file.params}

        data : obj
            data object
        svm : bool
            Whether or not these features are going to be used for kernel
            methods.

        Returns
        -------
        autoencoder.eval() : obj
            Autoencoder model object in eval mode to get the latent space.
        """

        params_path = encoder.get("params")
        model_path = encoder.get("model")

        model_params = json.load(open(params_path, "r"))
        model_params = model_params.get("model")
        name = model_params.pop("name")
        del model_params["type"]  # delete unneeded key, value

        input_dimension = model_params.pop("input_dimension")
        output_dimension = model_params.pop("output_dimension")

        autoencoder = dynamic_import(
            name, "ml4chem.atomistic.models", alt_name="autoencoders"
        )
        autoencoder = autoencoder(**model_params)
        autoencoder.prepare_model(input_dimension, output_dimension, **kwargs)
        autoencoder.load_state_dict(torch.load(model_path), strict=True)

        return autoencoder.eval()
Exemple #3
0
    def calculate(self, images, purpose="training", data=None, svm=False):
        """Return features per atom in an atoms object

        Parameters
        ----------
        images : dict
            Hashed images using the Data class.
        purpose : str
            The supported purposes are: 'training', 'inference'.
        data : obj
            data object
        svm : bool
            Whether or not these features are going to be used for kernel
            methods.

        Returns
        -------
        feature_space : dict
            A dictionary with key hash and value as a list with the following
            structure: {'hash': [('H', [vector]]}
        """
        # Now, we need to take the inputs and convert them to the right feature
        # space
        name, kwargs = self.features
        features = dynamic_import(name, "ml4chem.atomistic.features")
        features = features(**kwargs)

        feature_space = features.calculate(
            images, data=data, purpose=purpose, svm=False
        )

        preprocessor = Preprocessing(self.preprocessor, purpose=purpose)
        preprocessor.set(purpose=purpose)

        encoder = self.load_encoder(self.encoder, data=data, purpose=purpose)

        if self.preprocessor is not None and purpose == "training":
            hashes, symbols, _latent_space = encoder.get_latent_space(
                feature_space, svm=True, purpose="preprocessing"
            )
            _latent_space = preprocessor.fit(_latent_space, scheduler=self.scheduler)

            latent_space = OrderedDict()

            # TODO parallelize this.
            index = 0
            for i, hash in enumerate(hashes):
                pairs = []

                for symbol in symbols[i]:
                    feature_vector = _latent_space[index]

                    if svm is False:
                        feature_vector = torch.tensor(
                            feature_vector, requires_grad=False, dtype=torch.float
                        )

                    pairs.append((symbol, feature_vector))
                    index += 1

                latent_space[hash] = pairs

            del _latent_space

            # Save preprocessor.
            preprocessor.save_to_file(preprocessor, self.save_preprocessor)

        elif self.preprocessor is not None and purpose == "inference":
            hashes, symbols, _latent_space = encoder.get_latent_space(
                feature_space, svm=True, purpose="preprocessing"
            )
            scaled_latent_space = preprocessor.transform(_latent_space)

            latent_space = OrderedDict()
            # TODO parallelize this.
            index = 0
            for i, hash in enumerate(hashes):
                pairs = []

                for symbol in symbols[i]:
                    feature_vector = scaled_latent_space[index]

                    if svm is False:
                        feature_vector = torch.tensor(
                            feature_vector, requires_grad=False, dtype=torch.float
                        )

                    pairs.append((symbol, feature_vector))
                    index += 1

                latent_space[hash] = pairs

            del _latent_space

        else:
            if encoder.name() == "VAE":
                purpose = "inference"
            latent_space = encoder.get_latent_space(
                feature_space, svm=svm, purpose=purpose
            )

        self.feature_space = latent_space
        return latent_space
Exemple #4
0
    def train(
        self,
        inputs,
        targets,
        data=None,
        optimizer=(None, None),
        epochs=100,
        regularization=None,
        convergence=None,
        lossfxn=None,
        device="cpu",
        batch_size=None,
        lr_scheduler=None,
        independent_loss=True,
        loss_weights=None,
    ):
        """Train the models

        Parameters
        ----------
        inputs : dict
            Dictionary with hashed feature space.
        targets : list
            The expected values that the model has to learn aka y.
        model : object
            The NeuralNetwork class.
        data : object
            Data object created from the handler.
        optimizer : tuple
            The optimizer is a tuple with the structure:
                >>> ('adam', {'lr': float, 'weight_decay'=float})

        epochs : int
            Number of full training cycles.
        regularization : float
            This is the L2 regularization. It is not the same as weight decay.
        convergence : dict
            Instead of using epochs, users can set a convergence criterion.
                >>> convergence = {"rmse": [0.04, 0.02]}
        lossfxn : obj
            A loss function object.
        device : str
            Calculation can be run in the cpu or cuda (gpu).
        batch_size : int
            Number of data points per batch to use for training. Default is None.
        lr_scheduler : tuple
            Tuple with structure: scheduler's name and a dictionary with keyword
            arguments.

            >>> lr_scheduler = ('ReduceLROnPlateau',
                                {'mode': 'min', 'patience': 10})
        independent_loss : bool
            Whether or not models' weight are optimized independently.
        loss_weights : list
            How much the loss of model(i) contributes to the total loss.
        """

        self.epochs = epochs

        # Convergence criterion
        if isinstance(convergence["rmse"], float) or isinstance(
                convergence["rmse"], int):
            convergence["rmse"] = np.array(
                [convergence["rmse"] for model in range(len(self.models))])
        elif isinstance(convergence["rmse"], list):
            if len(convergence["rmse"]) != len(self.models):
                raise (
                    "Your convergence list is not the same length of the number of models"
                )
            convergence["rmse"] = np.array(convergence["rmse"])

        logger.info(" ")
        logging.info("Model Merger")
        logging.info("============")
        now = datetime.datetime.now()
        logger.info("Module accessed on {}.".format(
            now.strftime("%Y-%m-%d %H:%M:%S")))
        logging.info("Merging the following models:")

        for model in self.models:
            logging.info("    - {}.".format(model.name()))

        logging.info("Loss functions:")

        if loss_weights is None:
            self.loss_weights = [1.0 / len(lossfxn) for l in lossfxn]
        else:
            self.loss_weights = loss_weights

        for index, l in enumerate(lossfxn):
            logging.info("    - Name: {}; Weight: {}.".format(
                l.__name__, self.loss_weights[index]))
        logging.info("Convergence criterion: {}.".format(convergence))

        # If no batch_size provided then the whole training set length is the batch.
        if batch_size is None:
            batch_size = len(inputs.values())

        if isinstance(batch_size, int):
            chunks = []
            for inputs_ in inputs:

                if inspect.ismethod(inputs_):
                    chunks.append(inputs_)
                else:
                    chunks.append(
                        list(get_chunks(inputs_, batch_size, svm=False)))

            targets = [
                list(get_chunks(target, batch_size, svm=False))
                for target in targets
            ]
            atoms_per_image = list(
                get_chunks(data.atoms_per_image, batch_size, svm=False))

        if lossfxn is None:
            self.lossfxn = [None for model in self.models]
        else:
            self.lossfxn = lossfxn

        self.device = device

        # Population of extra Attributes needed by the models, and further data
        # preprocessing

        for index, loss in enumerate(lossfxn):
            _args, _varargs, _keywords, _defaults = inspect.getargspec(loss)
            if "latent" in _args:
                train = dynamic_import("train",
                                       "ml4chem.atomistic.models",
                                       alt_name="autoencoders")
                self.inputs_chunk_vals = train.get_inputs_chunks(chunks[index])
            else:
                self.inputs_chunk_vals = None

        parameters = []
        for index, model in enumerate(self.models):
            parameters += model.parameters()
            if model.name() == "PytorchPotentials":
                # These models require targets as tensors
                self.atoms_per_image = torch.tensor(atoms_per_image,
                                                    requires_grad=False,
                                                    dtype=torch.float)
                _targets = [
                    torch.tensor(batch, requires_grad=False)
                    for batch in targets[index]
                ]
                targets[index] = _targets
                del _targets
            elif model.name() in ModelMerger.autoencoders:
                targets[index] = lod_to_list(targets[index])

        # Data scattering
        client = dask.distributed.get_client()

        # self.targets = [client.scatter(target) for target in targets]
        self.targets = [target for target in targets]

        self.chunks = []

        for i, chunk in enumerate(chunks):
            if inspect.ismethod(chunk) is False:
                self.chunks.append(client.scatter(chunk))
            else:
                # This list comprehension is useful to have the same number of
                # functions as the same number of chunks without users' input.
                chunk = [chunk for _ in range(len(self.targets[i]))]
                self.chunks.append(chunk)

        del chunks

        logger.info(" ")
        logging.info("Batch Information")
        logging.info("-----------------")
        logging.info("Number of batches:")
        for index, c in enumerate(self.chunks):
            logging.info("    - Model {}, {}.".format(index, len(c)))
        logging.info("Batch size: {} elements per batch.\n".format(batch_size))

        # Define optimizer

        self.optimizer_name, self.optimizer = get_optimizer(
            optimizer, parameters)

        if lr_scheduler is not None:
            self.scheduler = get_lr_scheduler(self.optimizer, lr_scheduler)

        logger.info(" ")
        logger.info("Starting training...")
        logger.info(" ")

        logger.info("{:6s} {:19s} {:12s} {:8s}".format("Epoch", "Time Stamp",
                                                       "Loss", "RMSE (ave)"))
        logger.info("{:6s} {:19s} {:12s} {:8s}".format("------",
                                                       "-------------------",
                                                       "------------",
                                                       "--------------"))

        converged = False
        epoch = 0

        if independent_loss is False:
            # Convert list of chunks from [[a, c], [b, d]] to [[a, b], [c, d]]
            self.chunks = list(map(list, zip(*self.chunks)))

        old_state_dict = {}

        for key in self.models[1].state_dict():
            old_state_dict[key] = self.models[1].state_dict()[key].clone()

        from ml4chem.atomistic.models.autoencoders import Annealer

        annealer = Annealer()

        while not converged:
            epoch += 1
            self.annealing = annealer.update(epoch)

            self.optimizer.zero_grad()  # clear previous gradients

            if independent_loss:
                losses = []
                outputs = []
                for model_index, model in enumerate(self.models):
                    loss, output = self.closure(model_index,
                                                model,
                                                independent_loss,
                                                name=model.name())
                    losses.append(loss)
                    outputs.append(output)

            else:
                loss, outputs = self.closure(index, self.models,
                                             independent_loss)

            rmse = []
            for i, model in enumerate(self.models):
                outputs_ = outputs[i]
                targets_ = self.targets[i]

                if model.name() == "VAE":
                    # VAE usually returns a complex output with mus and sigmas
                    # but we only need mus at this stage.
                    outputs_ = [sublist[0] for sublist in outputs_]
                rmse.append(compute_rmse(outputs_, targets_))
            rmse = np.array(rmse)

            _rmse = np.average(rmse)

            if self.optimizer_name != "LBFGS":
                self.optimizer.step()
            else:
                options = {
                    "closure": self.closure,
                    "current_loss": loss,
                    "max_ls": 10
                }
                self.optimizer.step(options)

            ts = time.time()
            ts = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d "
                                                              "%H:%M:%S")
            logger.info("{:6d} {} {:8e} {:8f}".format(epoch, ts, loss, _rmse))

            if convergence is None and epoch == self.epochs:
                converged = True
            elif convergence is not None and (rmse <=
                                              convergence["rmse"]).all():
                converged = True
                new_state_dict = {}

                for key in self.models[1].state_dict():
                    new_state_dict[key] = self.models[1].state_dict(
                    )[key].clone()

                for key in old_state_dict:
                    if not (old_state_dict[key] == new_state_dict[key]).all():
                        print("Diff in {}".format(key))
                    else:
                        print("No diff in {}".format(key))
            print(convergence)
            print(rmse)

        print("Final")
        print(convergence)
        print(rmse)
Exemple #5
0
    def closure(self, index, model, independent_loss, name=None):
        """Closure

        This method clears previous gradients, iterates over batches,
        accumulates the gradients, reduces the gradients, update model
        params, and finally returns loss and outputs_.

        Parameters
        ----------
        index : int
            Index of model.
        model : obj
            Model object.
        independent_loss : bool
            Whether or not models' weight are optimized independently.
        name : str, optional
            Model class's name, by default None.

        Returns
        -------
        loss, outputs
            A tuple with loss function magnitudes and tensor with outputs.
        """

        client = dask.distributed.get_client()

        if name == "PytorchPotentials" and independent_loss:
            train = dynamic_import("train",
                                   "ml4chem.atomistic.models",
                                   alt_name="neuralnetwork")

            inputs = []
            # FIXME this is not scaling to n number of models.
            for chunk_index, chunk in enumerate(self.chunks[index - 1]):

                inputs_ = self.chunks[index][chunk_index](OrderedDict(
                    chunk.result()))
                inputs.append(client.scatter(inputs_))

            loss, outputs_ = train.closure(
                inputs,
                self.targets[index],
                model,
                self.lossfxn[index],
                self.atoms_per_image,
                self.device,
            )
            return loss, outputs_

        elif name in ModelMerger.autoencoders and independent_loss:
            train = dynamic_import("train",
                                   "ml4chem.atomistic.models",
                                   alt_name="autoencoders")
            targets = self.targets[index]

            loss, outputs_ = train.closure(
                self.chunks[index],
                targets,
                model,
                self.lossfxn[index],
                self.device,
                self.inputs_chunk_vals,
            )
            return loss, outputs_

        else:  # Models are dependent on each other

            running_loss = torch.tensor(0, dtype=torch.float)
            accumulation = []

            for index, chunk in enumerate(self.chunks):
                accumulation.append(
                    client.submit(
                        self.train_batches,
                        *(
                            index,
                            chunk,
                            self.targets,
                            self.models,
                            self.lossfxn,
                            self.atoms_per_image,
                            self.device,
                        )))

            dask.distributed.wait(accumulation)
            accumulation = client.gather(accumulation)

            grads = {}
            outputs_ = {}
            losses = {}
            for model_index, (outputs, loss, grad) in enumerate(accumulation):
                for model_index in range(len(self.models)):
                    if model_index not in grads.keys():
                        grads[model_index] = []
                        outputs_[model_index] = []
                        losses[model_index] = []
                    running_loss += loss[model_index]
                    losses[model_index].append(loss[model_index])
                    grads[model_index].append(np.array(grad[model_index]))
                    outputs_[model_index].append(outputs[model_index])

            # Sum gradients per model
            for key, grad in grads.items():
                grads[key] = sum(grad)

            # Update the gradients of the model
            for model_index, model in enumerate(self.models):
                for index, param in enumerate(model.parameters()):
                    param.grad = torch.tensor(grads[model_index][index])

            return running_loss, outputs_
Exemple #6
0
    def load(Cls, model=None, params=None, preprocessor=None, **kwargs):
        """Load ML4Chem models

        Parameters
        ----------
        model : str
            The path to load the model from the .ml4c file for inference.
        params : srt
            The path to load .params file with users' inputs.
        preprocessor : str
            The path to load the file with the sklearn preprocessor object.
        """
        kwargs["ml4chem_path"] = model
        kwargs["preprocessor"] = preprocessor

        with open(params, "rb") as ml4chem_params:
            ml4chem_params = json.load(ml4chem_params)
            model_type = ml4chem_params["model"].get("type")

            model_params = ml4chem_params["model"]
            class_name = model_params["class_name"]
            module_name = Potentials.module_names[model_params["name"]]

            model_class = dynamic_import(class_name,
                                         "ml4chem.atomistic.models",
                                         alt_name=module_name)

            delete = ["name", "type", "class_name"]
            for param in delete:
                # delete unneeded (key, value) pairs.
                del model_params[param]

            if model_type == "svm":

                weights = load(model)
                # TODO remove after de/serialization is fixed.
                try:
                    weights = {
                        key.decode("utf-8"): value
                        for key, value in weights.items()
                    }
                except AttributeError:
                    weights = {key: value for key, value in weights.items()}

                model_params.update({"weights": weights})
                model = model_class(**model_params)
            else:
                # Instantiate the model class
                model = model_class(**model_params)

        # Instantiation of fingerprint class
        fingerprint_params = ml4chem_params.get("features", None)

        if fingerprint_params == None:
            features = None
        else:
            if "kwargs" in fingerprint_params.keys():
                update_dict_with = fingerprint_params.pop("kwargs")
                fingerprint_params.update(update_dict_with)

            if fingerprint_params is None:
                features = fingerprint_params
            else:
                name = fingerprint_params.get("name")
                del fingerprint_params["name"]

                features = dynamic_import(name, "ml4chem.atomistic.features")
                features = features(**fingerprint_params)

        calc = Cls(features=features, model=model, **kwargs)

        return calc
Exemple #7
0
    def train(self,
              training_set,
              epochs=100,
              lr=0.001,
              convergence=None,
              device="cpu",
              optimizer=(None, None),
              lossfxn=None,
              regularization=0.0,
              batch_size=None,
              **kwargs):
        """Method to train models

        Parameters
        ----------
        training_set : object, list
            List containing the training set.
        epochs : int
            Number of full training cycles.
        lr : float
            Learning rate.
        convergence : dict
            Instead of using epochs, users can set a convergence criterion.
        device : str
            Calculation can be run in the cpu or cuda (gpu).
        optimizer : tuple
            The optimizer is a tuple with the structure:

                >>> ('adam', {'lr': float, 'weight_decay'=float})

        lossfxn : object
            A loss function object.
        regularization : float
            This is the L2 regularization. It is not the same as weight decay.
        batch_size : int
            Number of data points per batch to use for training. Default is
            None.
        """

        purpose = "training"
        # Raw input and targets aka X, y
        data_handler = Data(training_set, purpose=purpose)
        training_set, targets = data_handler.get_data(purpose=purpose)

        # Now let's featurize
        # SVM models
        if self.model.name() in Potentials.svm_models:
            # Mapping raw positions into a feature space aka X
            feature_space, reference_features = self.features.calculate(
                training_set, data=data_handler, purpose=purpose, svm=True)

            self.model.prepare_model(feature_space,
                                     reference_features,
                                     data=data_handler)

            self.model.train(feature_space, targets)
        else:
            # Mapping raw positions into a feature space aka X

            feature_space = self.features.calculate(training_set,
                                                    data=data_handler,
                                                    purpose=purpose,
                                                    svm=False)

            # Fixed fingerprint dimension
            input_dimension = len(list(feature_space.values())[0][0][-1])
            self.model.prepare_model(input_dimension, data=data_handler)

            # CUDA stuff
            if device == "cuda":
                logger.info("Checking if CUDA is available...")
                use_cuda = torch.cuda.is_available()
                if use_cuda:
                    count = torch.cuda.device_count()
                    logger.info(
                        "ML4Chem found {} CUDA devices available.".format(
                            count))

                    for index in range(count):
                        device_name = torch.cuda.get_device_name(index)

                        if index == 0:
                            device_name += " (Default)"

                        logger.info("    - {}.".format(device_name))

                else:
                    logger.warning("No CUDA available. We will use CPU.")
                    device = "cpu"

            device_ = torch.device(device)

            self.model.to(device_)

            # This is something specific of pytorch.
            module = Potentials.module_names[self.model.name()]
            train = dynamic_import("train",
                                   "ml4chem.atomistic.models",
                                   alt_name=module)

            # Let's train
            train(feature_space,
                  targets,
                  model=self.model,
                  data=data_handler,
                  optimizer=optimizer,
                  regularization=regularization,
                  epochs=epochs,
                  convergence=convergence,
                  lossfxn=lossfxn,
                  device=device,
                  batch_size=batch_size,
                  **kwargs)

        self.save(self.model,
                  features=self.features,
                  path=self.path,
                  label=self.label)
Exemple #8
0
    def train(self,
              inputs,
              targets,
              data=None,
              optimizer=(None, None),
              regularization=None,
              epochs=100,
              convergence=None,
              lossfxn=None,
              device="cpu",
              batch_size=None,
              lr_scheduler=None,
              independent_loss=True,
              loss_weights=None):

        logger.info(" ")
        logging.info("Model Merger")
        logging.info("============")
        logging.info("Merging the following models:")

        for model in self.models:
            logging.info("    - {}.".format(model.name()))

        logging.info("Loss functions:")

        if loss_weights is None:
            self.loss_weights = [1. / len(lossfxn) for l in lossfxn]
        else:
            self.loss_weights = loss_weights

        for l in lossfxn:
            logging.info("    - {}.".format(l.__name__))

        # If no batch_size provided then the whole training set length is the batch.
        if batch_size is None:
            batch_size = len(inputs.values())

        if isinstance(batch_size, int):
            chunks = []
            for inputs_ in inputs:

                if inspect.ismethod(inputs_):
                    chunks.append(inputs_)
                else:
                    chunks.append(
                        list(get_chunks(inputs_, batch_size, svm=False)))

            targets = [
                list(get_chunks(target, batch_size, svm=False))
                for target in targets
            ]
            atoms_per_image = list(
                get_chunks(data.atoms_per_image, batch_size, svm=False))

        if lossfxn is None:
            self.lossfxn = [None for model in self.models]
        else:
            self.lossfxn = lossfxn

        self.device = device

        # Population of extra Attributes needed by the models, and further data
        # preprocessing

        for index, loss in enumerate(lossfxn):
            _args, _varargs, _keywords, _defaults = inspect.getargspec(loss)
            if "latent" in _args:
                train = dynamic_import("train",
                                       "ml4chem.models",
                                       alt_name="autoencoders")
                self.inputs_chunk_vals = train.get_inputs_chunks(chunks[index])

        parameters = []
        for index, model in enumerate(self.models):
            parameters += model.parameters()
            if model.name() == "PytorchPotentials":
                # These models require targets as tensors
                self.atoms_per_image = torch.tensor(atoms_per_image,
                                                    requires_grad=False,
                                                    dtype=torch.float)
                _targets = [
                    torch.tensor(batch, requires_grad=False)
                    for batch in targets[index]
                ]
                targets[index] = _targets
                del _targets
            elif model.name() == "AutoEncoder":
                targets[index] = lod_to_list(targets[index])

        # Data scattering
        client = dask.distributed.get_client()

        # self.targets = [client.scatter(target) for target in targets]
        self.targets = [target for target in targets]

        self.chunks = []

        for i, chunk in enumerate(chunks):
            if inspect.ismethod(chunk) is False:
                self.chunks.append(client.scatter(chunk))
            else:
                # This list comprehension is useful to have the same number of
                # functions as the same number of chunks without users' input.
                chunk = [chunk for _ in range(len(self.targets[i]))]
                self.chunks.append(chunk)

        del chunks

        logger.info(" ")
        logging.info("Batch Information")
        logging.info("-----------------")
        logging.info("Number of batches:")
        for index, c in enumerate(self.chunks):
            logging.info('    - Model {}, {}.'.format(index, len(c)))
        logging.info("Batch size: {} elements per batch.\n".format(batch_size))

        # Define optimizer

        self.optimizer_name, self.optimizer = get_optimizer(
            optimizer, parameters)

        if lr_scheduler is not None:
            self.scheduler = get_lr_scheduler(self.optimizer, lr_scheduler)

        logger.info(" ")
        logger.info("Starting training...")
        logger.info(" ")

        logger.info("{:6s} {:19s} {:12s} {:8s}".format("Epoch", "Time Stamp",
                                                       "Loss", "RMSE (ave)"))
        logger.info("{:6s} {:19s} {:12s} {:8s}".format("------",
                                                       "-------------------",
                                                       "------------",
                                                       "--------------"))

        converged = False
        epoch = 0

        if independent_loss is False:
            # Convert list of chunks from [[a, c], [b, d]] to [[a, b], [c, d]]
            self.chunks = list(map(list, zip(*self.chunks)))

        old_state_dict = {}

        for key in self.models[1].state_dict():
            old_state_dict[key] = self.models[1].state_dict()[key].clone()

        while not converged:
            epoch += 1

            self.optimizer.zero_grad()  # clear previous gradients

            if independent_loss:
                losses = []
                for model_index, model in enumerate(self.models):
                    name = model.name()
                    loss, outputs = self.closure(model_index,
                                                 model,
                                                 independent_loss,
                                                 name=name)
                    losses.append(loss)

            else:
                loss, outputs = self.closure(index, self.models,
                                             independent_loss)

            rmse = []
            for i, model in enumerate(self.models):
                rmse.append(compute_rmse(outputs[i], self.targets[i]))
            # print(outputs[1])
            # print(targets[1])

            # print(rmse)
            _rmse = np.average(rmse)

            if self.optimizer_name != "LBFGS":
                self.optimizer.step()
            else:
                options = {
                    "closure": self.closure,
                    "current_loss": loss,
                    "max_ls": 10
                }
                self.optimizer.step(options)

            ts = time.time()
            ts = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d "
                                                              "%H:%M:%S")
            logger.info("{:6d} {} {:8e} {:8f}".format(epoch, ts, loss, _rmse))

            if convergence is None and epoch == self.epochs:
                converged = True
            elif convergence is not None and all(i <= convergence["rmse"]
                                                 for i in rmse):
                converged = True
                new_state_dict = {}

                for key in self.models[1].state_dict():
                    new_state_dict[key] = self.models[1].state_dict(
                    )[key].clone()

                for key in old_state_dict:
                    if not (old_state_dict[key] == new_state_dict[key]).all():
                        print('Diff in {}'.format(key))
                    else:
                        print('No diff in {}'.format(key))