Ejemplo n.º 1
0
    def __init__(
        self,
        inputs,
        targets,
        model=None,
        data=None,
        optimizer=(None, None),
        regularization=None,
        epochs=100,
        convergence=None,
        lossfxn=None,
        device="cpu",
        batch_size=None,
        lr_scheduler=None,
        **kwargs
    ):

        supported_keys = ["anneal", "penalize_latent"]

        if len(kwargs.items()) == 0:
            for k in supported_keys:
                setattr(self, k, None)
        else:
            for k, v in kwargs.items():
                if k in supported_keys:
                    setattr(self, k, v)

        self.initial_time = time.time()

        if device == "cuda":
            pass
            """
            logger.info('Moving data to CUDA...')

            targets = targets.cuda()
            _inputs = OrderedDict()

            for hash, f in inputs.items():
                _inputs[hash] = []
                for features in f:
                    symbol, vector = features
                    _inputs[hash].append((symbol, vector.cuda()))

            del inputs
            inputs = _inputs

            move_time = time.time() - initial_time
            h, m, s = convert_elapsed_time(move_time)
            logger.info('Data moved to GPU in {} hours {} minutes {:.2f}
                         seconds.' .format(h, m, s))
            """

        if batch_size is None:
            batch_size = len(inputs.values())

        if isinstance(batch_size, int):
            chunks = list(get_chunks(inputs, batch_size, svm=False))
            targets_ = list(get_chunks(targets, batch_size, svm=False))

        del targets

        # This change is needed because the targets are features or
        # positions and they are built as a dictionary.

        targets = lod_to_list(targets_)

        logging.info("Batch size: {} elements per batch.".format(batch_size))

        if device == "cuda":
            logger.info("Moving data to CUDA...")

            targets = targets.cuda()
            _inputs = OrderedDict()

            for hash, f in inputs.items():
                _inputs[hash] = []
                for features in f:
                    symbol, vector = features
                    _inputs[hash].append((symbol, vector.cuda()))

            inputs = _inputs

            move_time = time.time() - self.initial_time
            h, m, s = convert_elapsed_time(move_time)
            logger.info(
                "Data moved to GPU in {} hours {} minutes {:.2f} \
                         seconds.".format(
                    h, m, s
                )
            )
            logger.info(" ")

        # Define optimizer
        self.optimizer_name, self.optimizer = get_optimizer(
            optimizer, model.parameters()
        )
        if lr_scheduler is not None:
            self.scheduler = get_lr_scheduler(self.optimizer, lr_scheduler)

        if lossfxn is None:
            self.lossfxn = MSELoss
            self.inputs_chunk_vals = None

        else:
            logger.info("Using custom loss function...")
            logger.info("")

            self.lossfxn = lossfxn
            self.inputs_chunk_vals = self.get_inputs_chunks(chunks)

        logger.info(" ")
        logger.info("Starting training...")
        logger.info(" ")

        logger.info(
            "{:6s} {:19s} {:12s} {:9s}".format("Epoch", "Time Stamp", "Loss", "Rec Err")
        )
        logger.info(
            "{:6s} {:19s} {:12s} {:9s}".format(
                "------", "-------------------", "------------", "--------"
            )
        )

        # Data scattering
        client = dask.distributed.get_client()
        self.chunks = [client.scatter(chunk) for chunk in chunks]
        self.targets = [client.scatter(target) for target in targets]

        self.device = device
        self.epochs = epochs
        self.model = model
        self.lr_scheduler = lr_scheduler
        self.convergence = convergence

        # Let the hunger game begin...
        self.trainer()
Ejemplo n.º 2
0
    def train(
        self,
        inputs,
        targets,
        data=None,
        optimizer=(None, None),
        epochs=100,
        regularization=None,
        convergence=None,
        lossfxn=None,
        device="cpu",
        batch_size=None,
        lr_scheduler=None,
        independent_loss=True,
        loss_weights=None,
    ):
        """Train the models

        Parameters
        ----------
        inputs : dict
            Dictionary with hashed feature space.
        targets : list
            The expected values that the model has to learn aka y.
        model : object
            The NeuralNetwork class.
        data : object
            Data object created from the handler.
        optimizer : tuple
            The optimizer is a tuple with the structure:
                >>> ('adam', {'lr': float, 'weight_decay'=float})

        epochs : int
            Number of full training cycles.
        regularization : float
            This is the L2 regularization. It is not the same as weight decay.
        convergence : dict
            Instead of using epochs, users can set a convergence criterion.
                >>> convergence = {"rmse": [0.04, 0.02]}
        lossfxn : obj
            A loss function object.
        device : str
            Calculation can be run in the cpu or cuda (gpu).
        batch_size : int
            Number of data points per batch to use for training. Default is None.
        lr_scheduler : tuple
            Tuple with structure: scheduler's name and a dictionary with keyword
            arguments.

            >>> lr_scheduler = ('ReduceLROnPlateau',
                                {'mode': 'min', 'patience': 10})
        independent_loss : bool
            Whether or not models' weight are optimized independently.
        loss_weights : list
            How much the loss of model(i) contributes to the total loss.
        """

        self.epochs = epochs

        # Convergence criterion
        if isinstance(convergence["rmse"], float) or isinstance(
                convergence["rmse"], int):
            convergence["rmse"] = np.array(
                [convergence["rmse"] for model in range(len(self.models))])
        elif isinstance(convergence["rmse"], list):
            if len(convergence["rmse"]) != len(self.models):
                raise (
                    "Your convergence list is not the same length of the number of models"
                )
            convergence["rmse"] = np.array(convergence["rmse"])

        logger.info(" ")
        logging.info("Model Merger")
        logging.info("============")
        now = datetime.datetime.now()
        logger.info("Module accessed on {}.".format(
            now.strftime("%Y-%m-%d %H:%M:%S")))
        logging.info("Merging the following models:")

        for model in self.models:
            logging.info("    - {}.".format(model.name()))

        logging.info("Loss functions:")

        if loss_weights is None:
            self.loss_weights = [1.0 / len(lossfxn) for l in lossfxn]
        else:
            self.loss_weights = loss_weights

        for index, l in enumerate(lossfxn):
            logging.info("    - Name: {}; Weight: {}.".format(
                l.__name__, self.loss_weights[index]))
        logging.info("Convergence criterion: {}.".format(convergence))

        # If no batch_size provided then the whole training set length is the batch.
        if batch_size is None:
            batch_size = len(inputs.values())

        if isinstance(batch_size, int):
            chunks = []
            for inputs_ in inputs:

                if inspect.ismethod(inputs_):
                    chunks.append(inputs_)
                else:
                    chunks.append(
                        list(get_chunks(inputs_, batch_size, svm=False)))

            targets = [
                list(get_chunks(target, batch_size, svm=False))
                for target in targets
            ]
            atoms_per_image = list(
                get_chunks(data.atoms_per_image, batch_size, svm=False))

        if lossfxn is None:
            self.lossfxn = [None for model in self.models]
        else:
            self.lossfxn = lossfxn

        self.device = device

        # Population of extra Attributes needed by the models, and further data
        # preprocessing

        for index, loss in enumerate(lossfxn):
            _args, _varargs, _keywords, _defaults = inspect.getargspec(loss)
            if "latent" in _args:
                train = dynamic_import("train",
                                       "ml4chem.atomistic.models",
                                       alt_name="autoencoders")
                self.inputs_chunk_vals = train.get_inputs_chunks(chunks[index])
            else:
                self.inputs_chunk_vals = None

        parameters = []
        for index, model in enumerate(self.models):
            parameters += model.parameters()
            if model.name() == "PytorchPotentials":
                # These models require targets as tensors
                self.atoms_per_image = torch.tensor(atoms_per_image,
                                                    requires_grad=False,
                                                    dtype=torch.float)
                _targets = [
                    torch.tensor(batch, requires_grad=False)
                    for batch in targets[index]
                ]
                targets[index] = _targets
                del _targets
            elif model.name() in ModelMerger.autoencoders:
                targets[index] = lod_to_list(targets[index])

        # Data scattering
        client = dask.distributed.get_client()

        # self.targets = [client.scatter(target) for target in targets]
        self.targets = [target for target in targets]

        self.chunks = []

        for i, chunk in enumerate(chunks):
            if inspect.ismethod(chunk) is False:
                self.chunks.append(client.scatter(chunk))
            else:
                # This list comprehension is useful to have the same number of
                # functions as the same number of chunks without users' input.
                chunk = [chunk for _ in range(len(self.targets[i]))]
                self.chunks.append(chunk)

        del chunks

        logger.info(" ")
        logging.info("Batch Information")
        logging.info("-----------------")
        logging.info("Number of batches:")
        for index, c in enumerate(self.chunks):
            logging.info("    - Model {}, {}.".format(index, len(c)))
        logging.info("Batch size: {} elements per batch.\n".format(batch_size))

        # Define optimizer

        self.optimizer_name, self.optimizer = get_optimizer(
            optimizer, parameters)

        if lr_scheduler is not None:
            self.scheduler = get_lr_scheduler(self.optimizer, lr_scheduler)

        logger.info(" ")
        logger.info("Starting training...")
        logger.info(" ")

        logger.info("{:6s} {:19s} {:12s} {:8s}".format("Epoch", "Time Stamp",
                                                       "Loss", "RMSE (ave)"))
        logger.info("{:6s} {:19s} {:12s} {:8s}".format("------",
                                                       "-------------------",
                                                       "------------",
                                                       "--------------"))

        converged = False
        epoch = 0

        if independent_loss is False:
            # Convert list of chunks from [[a, c], [b, d]] to [[a, b], [c, d]]
            self.chunks = list(map(list, zip(*self.chunks)))

        old_state_dict = {}

        for key in self.models[1].state_dict():
            old_state_dict[key] = self.models[1].state_dict()[key].clone()

        from ml4chem.atomistic.models.autoencoders import Annealer

        annealer = Annealer()

        while not converged:
            epoch += 1
            self.annealing = annealer.update(epoch)

            self.optimizer.zero_grad()  # clear previous gradients

            if independent_loss:
                losses = []
                outputs = []
                for model_index, model in enumerate(self.models):
                    loss, output = self.closure(model_index,
                                                model,
                                                independent_loss,
                                                name=model.name())
                    losses.append(loss)
                    outputs.append(output)

            else:
                loss, outputs = self.closure(index, self.models,
                                             independent_loss)

            rmse = []
            for i, model in enumerate(self.models):
                outputs_ = outputs[i]
                targets_ = self.targets[i]

                if model.name() == "VAE":
                    # VAE usually returns a complex output with mus and sigmas
                    # but we only need mus at this stage.
                    outputs_ = [sublist[0] for sublist in outputs_]
                rmse.append(compute_rmse(outputs_, targets_))
            rmse = np.array(rmse)

            _rmse = np.average(rmse)

            if self.optimizer_name != "LBFGS":
                self.optimizer.step()
            else:
                options = {
                    "closure": self.closure,
                    "current_loss": loss,
                    "max_ls": 10
                }
                self.optimizer.step(options)

            ts = time.time()
            ts = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d "
                                                              "%H:%M:%S")
            logger.info("{:6d} {} {:8e} {:8f}".format(epoch, ts, loss, _rmse))

            if convergence is None and epoch == self.epochs:
                converged = True
            elif convergence is not None and (rmse <=
                                              convergence["rmse"]).all():
                converged = True
                new_state_dict = {}

                for key in self.models[1].state_dict():
                    new_state_dict[key] = self.models[1].state_dict(
                    )[key].clone()

                for key in old_state_dict:
                    if not (old_state_dict[key] == new_state_dict[key]).all():
                        print("Diff in {}".format(key))
                    else:
                        print("No diff in {}".format(key))
            print(convergence)
            print(rmse)

        print("Final")
        print(convergence)
        print(rmse)
Ejemplo n.º 3
0
    def train(self,
              inputs,
              targets,
              data=None,
              optimizer=(None, None),
              regularization=None,
              epochs=100,
              convergence=None,
              lossfxn=None,
              device="cpu",
              batch_size=None,
              lr_scheduler=None,
              independent_loss=True,
              loss_weights=None):

        logger.info(" ")
        logging.info("Model Merger")
        logging.info("============")
        logging.info("Merging the following models:")

        for model in self.models:
            logging.info("    - {}.".format(model.name()))

        logging.info("Loss functions:")

        if loss_weights is None:
            self.loss_weights = [1. / len(lossfxn) for l in lossfxn]
        else:
            self.loss_weights = loss_weights

        for l in lossfxn:
            logging.info("    - {}.".format(l.__name__))

        # If no batch_size provided then the whole training set length is the batch.
        if batch_size is None:
            batch_size = len(inputs.values())

        if isinstance(batch_size, int):
            chunks = []
            for inputs_ in inputs:

                if inspect.ismethod(inputs_):
                    chunks.append(inputs_)
                else:
                    chunks.append(
                        list(get_chunks(inputs_, batch_size, svm=False)))

            targets = [
                list(get_chunks(target, batch_size, svm=False))
                for target in targets
            ]
            atoms_per_image = list(
                get_chunks(data.atoms_per_image, batch_size, svm=False))

        if lossfxn is None:
            self.lossfxn = [None for model in self.models]
        else:
            self.lossfxn = lossfxn

        self.device = device

        # Population of extra Attributes needed by the models, and further data
        # preprocessing

        for index, loss in enumerate(lossfxn):
            _args, _varargs, _keywords, _defaults = inspect.getargspec(loss)
            if "latent" in _args:
                train = dynamic_import("train",
                                       "ml4chem.models",
                                       alt_name="autoencoders")
                self.inputs_chunk_vals = train.get_inputs_chunks(chunks[index])

        parameters = []
        for index, model in enumerate(self.models):
            parameters += model.parameters()
            if model.name() == "PytorchPotentials":
                # These models require targets as tensors
                self.atoms_per_image = torch.tensor(atoms_per_image,
                                                    requires_grad=False,
                                                    dtype=torch.float)
                _targets = [
                    torch.tensor(batch, requires_grad=False)
                    for batch in targets[index]
                ]
                targets[index] = _targets
                del _targets
            elif model.name() == "AutoEncoder":
                targets[index] = lod_to_list(targets[index])

        # Data scattering
        client = dask.distributed.get_client()

        # self.targets = [client.scatter(target) for target in targets]
        self.targets = [target for target in targets]

        self.chunks = []

        for i, chunk in enumerate(chunks):
            if inspect.ismethod(chunk) is False:
                self.chunks.append(client.scatter(chunk))
            else:
                # This list comprehension is useful to have the same number of
                # functions as the same number of chunks without users' input.
                chunk = [chunk for _ in range(len(self.targets[i]))]
                self.chunks.append(chunk)

        del chunks

        logger.info(" ")
        logging.info("Batch Information")
        logging.info("-----------------")
        logging.info("Number of batches:")
        for index, c in enumerate(self.chunks):
            logging.info('    - Model {}, {}.'.format(index, len(c)))
        logging.info("Batch size: {} elements per batch.\n".format(batch_size))

        # Define optimizer

        self.optimizer_name, self.optimizer = get_optimizer(
            optimizer, parameters)

        if lr_scheduler is not None:
            self.scheduler = get_lr_scheduler(self.optimizer, lr_scheduler)

        logger.info(" ")
        logger.info("Starting training...")
        logger.info(" ")

        logger.info("{:6s} {:19s} {:12s} {:8s}".format("Epoch", "Time Stamp",
                                                       "Loss", "RMSE (ave)"))
        logger.info("{:6s} {:19s} {:12s} {:8s}".format("------",
                                                       "-------------------",
                                                       "------------",
                                                       "--------------"))

        converged = False
        epoch = 0

        if independent_loss is False:
            # Convert list of chunks from [[a, c], [b, d]] to [[a, b], [c, d]]
            self.chunks = list(map(list, zip(*self.chunks)))

        old_state_dict = {}

        for key in self.models[1].state_dict():
            old_state_dict[key] = self.models[1].state_dict()[key].clone()

        while not converged:
            epoch += 1

            self.optimizer.zero_grad()  # clear previous gradients

            if independent_loss:
                losses = []
                for model_index, model in enumerate(self.models):
                    name = model.name()
                    loss, outputs = self.closure(model_index,
                                                 model,
                                                 independent_loss,
                                                 name=name)
                    losses.append(loss)

            else:
                loss, outputs = self.closure(index, self.models,
                                             independent_loss)

            rmse = []
            for i, model in enumerate(self.models):
                rmse.append(compute_rmse(outputs[i], self.targets[i]))
            # print(outputs[1])
            # print(targets[1])

            # print(rmse)
            _rmse = np.average(rmse)

            if self.optimizer_name != "LBFGS":
                self.optimizer.step()
            else:
                options = {
                    "closure": self.closure,
                    "current_loss": loss,
                    "max_ls": 10
                }
                self.optimizer.step(options)

            ts = time.time()
            ts = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d "
                                                              "%H:%M:%S")
            logger.info("{:6d} {} {:8e} {:8f}".format(epoch, ts, loss, _rmse))

            if convergence is None and epoch == self.epochs:
                converged = True
            elif convergence is not None and all(i <= convergence["rmse"]
                                                 for i in rmse):
                converged = True
                new_state_dict = {}

                for key in self.models[1].state_dict():
                    new_state_dict[key] = self.models[1].state_dict(
                    )[key].clone()

                for key in old_state_dict:
                    if not (old_state_dict[key] == new_state_dict[key]).all():
                        print('Diff in {}'.format(key))
                    else:
                        print('No diff in {}'.format(key))