Beispiel #1
0
    def __init__(self, tf_model, optim_method,
                 sess=None, dataset=None,
                 val_split=0.0,
                 clip_norm=None, clip_value=None,
                 model_dir=None):
        """
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        Note that if grads and variables are not None, then they need to be sorted by name
        if you want to use multiple optimization methods for a TensorFlow model according to
        variable names.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        """

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset

        self.clip_norm = clip_norm
        if clip_value is not None and not isinstance(clip_value, tuple):
            raise ValueError("The clip_value argument should be a tuple (min_value, max_value)")
        self.clip_constant = clip_value

        if self.dataset.batch_size <= 0:
            raise ValueError("You should set batch_size instead of batch_per_thread for training")

        self.model_dir = model_dir

        self.tf_model = tf_model

        batch_size = self.dataset.batch_size

        sample_rdd = self.dataset.get_training_data()

        if val_split != 0.0:
            training_rdd, val_rdd = sample_rdd.randomSplit([1 - val_split, val_split])
        else:
            training_rdd = sample_rdd
            val_rdd = self.dataset.get_validation_data()

        self.training_rdd = training_rdd
        self.val_rdd = val_rdd
        self.batch_size = batch_size

        self.estimator = Estimator(self.tf_model.training_helper_layer,
                                   self.optim_method,
                                   model_dir)

        if self.clip_norm:
            self.estimator.set_l2_norm_gradient_clipping(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.estimator.set_constant_gradient_clipping(min_value, max_value)
 def load_checkpoint(self, path, version):
     # todo make version optional
     model_path = os.path.join(path, "model.{}".format(version))
     optim_method_path = os.path.join(
         path, "optimMethod-TFParkTraining.{}".format(version))
     self.tf_model.training_helper_layer.load_checkpoint(model_path)
     self.optim_method = OptimMethod.load(optim_method_path)
     self.estimator = Estimator(self.tf_model.training_helper_layer,
                                self.optim_method, self.model_dir)
     if self.clip_norm:
         self.estimator.set_l2_norm_gradient_clipping(self.clip_norm)
     if self.clip_constant:
         min_value, max_value = self.clip_constant
         self.estimator.set_constant_gradient_clipping(min_value, max_value)
Beispiel #3
0
 def __init__(self,
              *,
              model,
              loss,
              optimizer=None,
              metrics=None,
              feature_preprocessing=None,
              label_preprocessing=None,
              model_dir=None):
     self.loss = loss
     self.optimizer = optimizer
     self.metrics = Metrics.convert_metrics_list(metrics)
     self.feature_preprocessing = feature_preprocessing
     self.label_preprocessing = label_preprocessing
     self.model_dir = model_dir
     self.model = model
     self.nn_model = NNModel(
         self.model, feature_preprocessing=self.feature_preprocessing)
     self.nn_estimator = NNEstimator(self.model, self.loss,
                                     self.feature_preprocessing,
                                     self.label_preprocessing)
     if self.optimizer is None:
         from bigdl.optim.optimizer import SGD
         self.optimizer = SGD()
     self.nn_estimator.setOptimMethod(self.optimizer)
     self.estimator = SparkEstimator(self.model, self.optimizer,
                                     self.model_dir)
     self.log_dir = None
     self.app_name = None
     self.is_nnframe_fit = False
Beispiel #4
0
    def load_orca_checkpoint(self, path, version, prefix=None):
        """
        Load existing checkpoint

        :param path: Path to the existing checkpoint.
        :param version: checkpoint version, which is the suffix of model.* file,
        i.e., for modle.4 file, the version is 4.
        :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc'
        :return:
        """
        from bigdl.nn.layer import Model, Container
        from bigdl.optim.optimizer import OptimMethod
        import os
        try:
            self.model = Model.load(
                os.path.join(path, "model.{}".format(version)))
            assert isinstance(self.model, Container), \
                "The loaded model should be a Container, please check your checkpoint type."
            self.optimizer = OptimMethod.load(
                os.path.join(path, "{}.{}".format(prefix, version)))
        except Exception:
            raise ValueError(
                "Cannot load BigDL checkpoint, please check your checkpoint path "
                "and checkpoint type.")
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is not None:
            self.nn_estimator.setOptimMethod(self.optimizer)
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)
Beispiel #5
0
    def load(self, checkpoint, optimizer=None, loss=None, feature_preprocessing=None,
             label_preprocessing=None, model_dir=None, is_checkpoint=False):
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            self.load_latest_orca_checkpoint(checkpoint)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl", checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir)
            self.nn_model = NNModel(self.model, feature_preprocessing=self.feature_preprocessing)
        return self
Beispiel #6
0
    def load_orca_checkpoint(self, path, version=None, prefix=None):
        """
        Load existing checkpoint. To load a specific checkpoint, please provide both `version`
        and `perfix`. If `version` is None, then the latest checkpoint under the specified
        directory will be loaded.

        :param path: Path to the existing checkpoint (or directory containing Orca checkpoint
               files).
        :param version: checkpoint version, which is the suffix of model.* file, i.e., for
               modle.4 file, the version is 4. If it is None, then load the latest checkpoint.
        :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc'
        :return:
        """
        from bigdl.nn.layer import Model, Container
        from bigdl.optim.optimizer import OptimMethod
        from zoo.orca.learn.utils import find_latest_checkpoint
        import os

        if version is None:
            path, prefix, version = find_latest_checkpoint(path,
                                                           model_type="bigdl")
            if path is None:
                raise ValueError(
                    "Cannot find BigDL checkpoint, please check your checkpoint"
                    " path.")
        else:
            assert prefix is not None, "You should provide optimMethod prefix, " \
                                       "for example 'optimMethod-TorchModelf53bddcc'"

        try:
            self.model = Model.load(
                os.path.join(path, "model.{}".format(version)))
            assert isinstance(self.model, Container), \
                "The loaded model should be a Container, please check your checkpoint type."
            self.optimizer = OptimMethod.load(
                os.path.join(path, "{}.{}".format(prefix, version)))
        except Exception:
            raise ValueError(
                "Cannot load BigDL checkpoint, please check your checkpoint path "
                "and checkpoint type.")
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is not None:
            self.nn_estimator.setOptimMethod(self.optimizer)
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)
Beispiel #7
0
 def load_orca_checkpoint(self, path, version, prefix=None):
     from bigdl.nn.layer import Model, Container
     from bigdl.optim.optimizer import OptimMethod
     import os
     try:
         self.model = Model.load(os.path.join(path, "model.{}".format(version)))
         assert isinstance(self.model, Container), \
             "The loaded model should be a Container, please check your checkpoint type."
         self.optimizer = OptimMethod.load(os.path.join(path,
                                                        "{}.{}".format(prefix, version)))
     except Exception:
         raise ValueError("Cannot load BigDL checkpoint, please check your checkpoint path "
                          "and checkpoint type.")
     self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir)
     self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing,
                                     self.label_preprocessing)
     if self.optimizer is not None:
         self.nn_estimator.setOptimMethod(self.optimizer)
     self.nn_model = NNModel(self.model, feature_preprocessing=self.feature_preprocessing)
Beispiel #8
0
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 dataset=None,
                 inputs=None,
                 grads=None,
                 variables=None,
                 graph=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None,
                 val_split=0.0,
                 tensors_with_value=None,
                 session_config=None,
                 clip_norm=None,
                 clip_value=None,
                 metrics=None,
                 updates=None,
                 freeze=False,
                 model_dir=None):
        """
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        Note that if grads and variables are not None, then they need to be sorted by name
        if you want to use multiple optimization methods for a TensorFlow model according to
        variable names.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        """

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(
                loss, optim_method, sess, val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[
                5:]

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.graph = graph

        self.clip_norm = clip_norm
        if clip_value is not None and not isinstance(clip_value, tuple):
            raise ValueError(
                "The clip_value argument should be a tuple (min_value, max_value)"
            )
        self.clip_constant = clip_value

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        if val_method is not None:
            val_methods = to_list(val_method)
            if metrics is None:
                metrics = {}

            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)

        if model_dir is None:
            model_dir = tempfile.mkdtemp()
        else:
            if not os.path.isdir(model_dir):
                os.makedirs(model_dir)

        self.model_dir = model_dir

        if freeze:
            self.tf_model = TFModel.create(loss, sess, inputs, grads,
                                           variables, graph,
                                           tensors_with_value, session_config,
                                           metrics, updates, model_dir)
        else:
            self.tf_model = TFModel.create_for_unfreeze(
                loss, sess, inputs, grads, variables, graph,
                tensors_with_value, session_config, metrics, updates,
                model_dir)

        batch_size = self.dataset.batch_size

        sample_rdd = self.dataset.get_training_data()

        if val_split != 0.0:
            training_rdd, val_rdd = sample_rdd.randomSplit(
                [1 - val_split, val_split])
        else:
            training_rdd = sample_rdd
            val_rdd = self.dataset.get_validation_data()

        self.training_rdd = training_rdd
        self.val_rdd = val_rdd
        self.batch_size = batch_size

        self.estimator = Estimator(self.tf_model.training_helper_layer,
                                   self.optim_method, model_dir)

        if self.clip_norm:
            self.estimator.set_l2_norm_gradient_clipping(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.estimator.set_constant_gradient_clipping(min_value, max_value)
Beispiel #9
0
class TFOptimizer:
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 dataset=None,
                 inputs=None,
                 grads=None,
                 variables=None,
                 graph=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None,
                 val_split=0.0,
                 tensors_with_value=None,
                 session_config=None,
                 clip_norm=None,
                 clip_value=None,
                 metrics=None,
                 updates=None,
                 freeze=False,
                 model_dir=None):
        """
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        Note that if grads and variables are not None, then they need to be sorted by name
        if you want to use multiple optimization methods for a TensorFlow model according to
        variable names.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        """

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(
                loss, optim_method, sess, val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[
                5:]

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.graph = graph

        self.clip_norm = clip_norm
        if clip_value is not None and not isinstance(clip_value, tuple):
            raise ValueError(
                "The clip_value argument should be a tuple (min_value, max_value)"
            )
        self.clip_constant = clip_value

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        if val_method is not None:
            val_methods = to_list(val_method)
            if metrics is None:
                metrics = {}

            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)

        if model_dir is None:
            model_dir = tempfile.mkdtemp()
        else:
            if not os.path.isdir(model_dir):
                os.makedirs(model_dir)

        self.model_dir = model_dir

        if freeze:
            self.tf_model = TFModel.create(loss, sess, inputs, grads,
                                           variables, graph,
                                           tensors_with_value, session_config,
                                           metrics, updates, model_dir)
        else:
            self.tf_model = TFModel.create_for_unfreeze(
                loss, sess, inputs, grads, variables, graph,
                tensors_with_value, session_config, metrics, updates,
                model_dir)

        batch_size = self.dataset.batch_size

        sample_rdd = self.dataset.get_training_data()

        if val_split != 0.0:
            training_rdd, val_rdd = sample_rdd.randomSplit(
                [1 - val_split, val_split])
        else:
            training_rdd = sample_rdd
            val_rdd = self.dataset.get_validation_data()

        self.training_rdd = training_rdd
        self.val_rdd = val_rdd
        self.batch_size = batch_size

        self.estimator = Estimator(self.tf_model.training_helper_layer,
                                   self.optim_method, model_dir)

        if self.clip_norm:
            self.estimator.set_l2_norm_gradient_clipping(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.estimator.set_constant_gradient_clipping(min_value, max_value)

    @staticmethod
    def _get_arguments_from_loss(loss, optim_method, session, val_outputs,
                                 val_labels, val_method):
        import tensorflow as tf
        if session is None:
            sess = tf.Session()
            sess.run(tf.global_variables_initializer())
        else:
            sess = session
        grads_vars = tf.train.GradientDescentOptimizer(0).compute_gradients(
            loss)
        grads_vars.sort(key=lambda grad_var: grad_var[1].name)
        variables = []
        grads = []
        for (grad, var) in grads_vars:
            if grad is not None:
                variables.append(var)
                grads.append(grad)

        all_required_inputs = _find_placeholders([loss])
        dataset = tf.get_collection(all_required_inputs[0].name)[0]

        inputs = nest.flatten(dataset._original_tensors)

        return [
            loss, optim_method, sess, dataset, inputs, grads, variables,
            loss.graph, val_outputs, val_labels, val_method
        ]

    @classmethod
    def from_loss(cls,
                  loss,
                  optim_method,
                  session=None,
                  val_outputs=None,
                  val_labels=None,
                  val_method=None,
                  val_split=0.0,
                  clip_norm=None,
                  clip_value=None,
                  metrics=None,
                  tensor_with_value=None,
                  **kwargs):
        """
        Create a TFOptimizer from a TensorFlow loss tensor.
        The loss tensor must come from a TensorFlow graph that only takes TFDataset.tensors and
        the tensors in `tensor_with_value` as inputs.
        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param session: the current TensorFlow Session, if you want to used a pre-trained model,
        you should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        :param val_outputs: the validation output TensorFlow tensor to be used by val_methods
        :param val_labels: the validation label TensorFlow tensor to be used by val_methods
        :param val_method: the BigDL val_method(s) to be used.
        :param val_split: Float between 0 and 1. Fraction of the training data to be used as
        validation data.
        :param clip_norm: float >= 0. Gradients will be clipped when their L2 norm exceeds
        this value.
        :param clip_value: float >= 0. Gradients will be clipped when their absolute value
        exceeds this value.
        :param metrics: a dictionary. The key should be a string representing the metric's name
        and the value should be the corresponding TensorFlow tensor, which should be a scalar.
        :param tensor_with_value: a dictionary. The key is TensorFlow tensor, usually a
        placeholder, the value of the dictionary is a tuple of two elements. The first one of
        the tuple is the value to feed to the tensor in training phase and the second one
        is the value to feed to the tensor in validation phase.
        :return: a TFOptimizer
        """
        args = TFOptimizer._get_arguments_from_loss(loss, optim_method,
                                                    session, val_outputs,
                                                    val_labels, val_method)
        if clip_value is not None:
            if isinstance(clip_value, float) or isinstance(clip_value, int):
                if clip_value <= 0:
                    ValueError(
                        "The clip_value argument should be positive number")
                clip_value = (-float(clip_value), float(clip_value))

            if not isinstance(clip_value, tuple):
                raise ValueError(
                    "The clip_value argument should be" +
                    " a positive float/int which clips to" +
                    " (-clip_value, clip_value); " +
                    "or a tuple which clips to (min_value, max_value)")

        return cls(*(args + [val_split]),
                   tensors_with_value=tensor_with_value,
                   clip_norm=clip_norm,
                   clip_value=clip_value,
                   metrics=metrics,
                   **kwargs)

    @classmethod
    def from_keras(cls,
                   keras_model,
                   dataset,
                   optim_method=None,
                   val_spilt=0.0,
                   **kwargs):
        """
        Create a TFOptimizer from a tensorflow.keras model. The model must be compiled.
        :param keras_model: the tensorflow.keras model, which must be compiled.
        :param dataset: a TFDataset
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param val_spilt: Float between 0 and 1. Fraction of the training data to be used as
        validation data.
        :return:
        """
        import tensorflow.keras.backend as K
        loss = keras_model.total_loss

        model_inputs = keras_model.inputs
        if hasattr(keras_model, "targets"):
            model_targets = keras_model.targets
        else:
            model_targets = keras_model._targets

        inputs = model_inputs + model_targets

        variables = keras_model._collected_trainable_weights
        variables.sort(key=lambda variable: variable.name)
        keras_optimizer = keras_model.optimizer

        grads = K.gradients(loss, variables)
        if None in grads:
            raise ValueError('An operation has `None` for gradient. '
                             'Please make sure that all of your ops have a '
                             'gradient defined (i.e. are differentiable). '
                             'Common ops without gradient: '
                             'K.argmax, K.round, K.eval.')
        clip_norm = None
        clip_value = None
        if hasattr(keras_optimizer, 'clipnorm'):
            clip_norm = keras_optimizer.clipnorm
        if hasattr(keras_optimizer, 'clipvalue'):
            clip_value = (-keras_optimizer.clipvalue,
                          keras_optimizer.clipvalue)

        sess = K.get_session()
        if optim_method is None:
            optim_method = keras_optimizer
        optim_method = to_bigdl_optim_method(optim_method)

        if keras_model.metrics and (dataset.get_validation_data() is not None
                                    or val_spilt != 0.0):
            if isinstance(keras_model.metrics, dict):
                raise ValueError(
                    "different metrics for different outputs are not supported right now"
                )

            if dataset.get_validation_data() is None and val_spilt == 0.0:
                raise ValueError(
                    "Validation data is not specified. Please set " +
                    "val_rdd in TFDataset, or set val_split larger than zero")

            if len(keras_model.outputs) > 1:
                if not all([
                        name.endswith("loss")
                        for name in keras_model.metrics_names
                ]):
                    raise ValueError(
                        "metrics (except loss) for multi-head model is not supported"
                    )
                else:
                    bigdl_val_methods = [Loss()]
                    val_outputs = keras_model.outputs
                    val_labels = model_targets
            else:
                bigdl_val_methods = \
                    [to_bigdl_metric(m, keras_model.loss) for m in keras_model.metrics_names]
                val_outputs = keras_model.outputs
                val_labels = model_targets
        else:
            val_outputs = None
            val_labels = None
            bigdl_val_methods = None

        tensor_with_value = {K.learning_phase(): [True, False]}

        updates = keras_model.updates

        return cls(loss,
                   optim_method,
                   sess,
                   dataset,
                   inputs,
                   grads,
                   variables,
                   loss.graph,
                   val_outputs,
                   val_labels,
                   bigdl_val_methods,
                   val_spilt,
                   tensors_with_value=tensor_with_value,
                   clip_norm=clip_norm,
                   clip_value=clip_value,
                   updates=updates,
                   **kwargs)

    def set_constant_gradient_clipping(self, min_value, max_value):
        """
        Configure constant clipping settings.

        :param min_value: the minimum value to clip by
        :param max_value: the maxmimum value to clip by
        """
        self.estimator.set_constant_gradient_clipping(min_value, max_value)

    def set_gradient_clipping_by_l2_norm(self, clip_norm):
        """
        Configure L2 norm clipping settings.
        :param clip_norm: gradient L2-Norm threshold
        """
        self.estimator.set_l2_norm_gradient_clipping(clip_norm)

    def optimize(self, end_trigger=None, checkpoint_trigger=None):
        """
        Run the training loop of the this optimizer
        :param end_trigger: BigDL's Trigger to indicate when to stop the training.
        :param checkpoint_trigger: When to save a checkpoint and evaluate model.
        """
        if end_trigger is None:
            end_trigger = MaxEpoch(1)

        if checkpoint_trigger is None:
            checkpoint_trigger = EveryEpoch()

        if self.tf_model.val_methods is not None and self.val_rdd is not None:
            self.estimator.train(train_set=self.training_rdd,
                                 criterion=IdentityCriterion(),
                                 end_trigger=end_trigger,
                                 checkpoint_trigger=checkpoint_trigger,
                                 validation_set=self.val_rdd,
                                 validation_method=self.tf_model.val_methods,
                                 batch_size=self.batch_size)
        else:
            self.estimator.train(train_set=self.training_rdd,
                                 criterion=IdentityCriterion(),
                                 end_trigger=end_trigger,
                                 batch_size=self.batch_size)

        self.tf_model.training_helper_layer.get_weights_to_python()
Beispiel #10
0
class TFOptimizer:
    def __init__(self,
                 tf_model,
                 optim_method,
                 sess=None,
                 dataset=None,
                 clip_norm=None,
                 clip_value=None,
                 model_dir=None):
        """
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        Note that if grads and variables are not None, then they need to be sorted by name
        if you want to use multiple optimization methods for a TensorFlow model according to
        variable names.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        """

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset

        self.clip_norm = clip_norm
        if clip_value is not None and not isinstance(clip_value, tuple):
            raise ValueError(
                "The clip_value argument should be a tuple (min_value, max_value)"
            )
        self.clip_constant = clip_value

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        self.model_dir = model_dir

        self.tf_model = tf_model

        batch_size = self.dataset.batch_size

        self.train_data = self.dataset.get_training_data()
        self.val_data = self.dataset.get_validation_data()

        self.batch_size = batch_size

        self.estimator = Estimator(self.tf_model.training_helper_layer,
                                   self.optim_method, self.model_dir)

        if self.clip_norm:
            self.estimator.set_l2_norm_gradient_clipping(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.estimator.set_constant_gradient_clipping(min_value, max_value)

    def load_checkpoint(self, path, version):
        # todo make version optional
        model_path = os.path.join(path, "model.{}".format(version))
        optim_method_path = os.path.join(
            path, "optimMethod-TFParkTraining.{}".format(version))
        self.tf_model.training_helper_layer.load_checkpoint(model_path)
        self.optim_method = OptimMethod.load(optim_method_path)
        self.estimator = Estimator(self.tf_model.training_helper_layer,
                                   self.optim_method, self.model_dir)
        if self.clip_norm:
            self.estimator.set_l2_norm_gradient_clipping(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.estimator.set_constant_gradient_clipping(min_value, max_value)

    @staticmethod
    def _get_or_create_session(session):
        import tensorflow as tf
        if session is None:
            sess = tf.Session()
            sess.run(tf.global_variables_initializer())
        else:
            sess = session
        return sess

    @staticmethod
    def _get_dataset_from_loss(loss):
        import tensorflow as tf
        all_required_inputs = find_placeholders([loss])
        dataset = tf.get_collection(all_required_inputs[0].name)[0]
        return dataset

    @staticmethod
    def _get_vars_grads(loss):
        import tensorflow as tf
        grads_vars = tf.train.GradientDescentOptimizer(0).compute_gradients(
            loss)
        grads_vars.sort(key=lambda grad_var: grad_var[1].name)
        variables = []
        grads = []
        for (grad, var) in grads_vars:
            if grad is not None:
                variables.append(var)
                grads.append(grad)
        return grads, variables

    @staticmethod
    def _get_vars_grads_from_train_op(train_op):
        def predicate(t):
            return t.name.split("/")[-1].startswith("zoo_identity_op_for_grad")

        grads = find_tensors([train_op], predicate)
        grad_ops = [grad.op for grad in grads]
        variables = []
        for grad in grad_ops:
            var = list(grad.control_inputs)[0]
            if var.name == "VarHandleOp":
                variables.append(var)
            else:
                variables.append(list(var.outputs)[0])
        # variables = [grad.op.control_inputs[0].outputs[0] for grad in grads]
        return grads, variables

    @classmethod
    def from_train_op(cls,
                      train_op,
                      loss,
                      *,
                      inputs=None,
                      labels=None,
                      metrics=None,
                      updates=None,
                      sess=None,
                      dataset=None,
                      tensor_with_value=None,
                      session_config=None,
                      model_dir=None):

        sess = TFOptimizer._get_or_create_session(sess)
        grads, variables = TFOptimizer._get_vars_grads_from_train_op(train_op)
        if dataset is None:
            dataset = TFOptimizer._get_dataset_from_loss(loss)
        _ = dataset.tensors  # trigger create tensors if not available
        dataset_inputs = dataset._original_tensors
        if isinstance(dataset_inputs, tuple) and len(dataset_inputs) == 2:
            if inputs is None:
                inputs = dataset_inputs[0]

            if labels is None:
                labels = dataset_inputs[1]
        else:
            if inputs is None:
                inputs = dataset_inputs

            if labels is None:
                labels = []

        inputs = nest.flatten(inputs)
        labels = nest.flatten(labels)
        from zoo.tfpark.zoo_optimizer import FakeOptimMethod
        return TFOptimizer._from_grads(loss=loss,
                                       sess=sess,
                                       inputs=inputs,
                                       labels=labels,
                                       grads=grads,
                                       variables=variables,
                                       dataset=dataset,
                                       metrics=metrics,
                                       tensor_with_value=tensor_with_value,
                                       optim_method=FakeOptimMethod(),
                                       session_config=session_config,
                                       updates=updates,
                                       model_dir=model_dir,
                                       train_op=train_op)

    @classmethod
    def _from_grads(cls,
                    loss,
                    sess,
                    inputs,
                    labels,
                    grads,
                    variables,
                    dataset,
                    optim_method=None,
                    clip_norm=None,
                    clip_value=None,
                    metrics=None,
                    tensor_with_value=None,
                    session_config=None,
                    model_dir=None,
                    updates=None,
                    train_op=None):
        graph = loss.graph
        if metrics is None:
            metrics = {}

        tf_model = TFModel.create(loss,
                                  sess,
                                  inputs,
                                  labels, [],
                                  grads,
                                  variables,
                                  graph,
                                  tensor_with_value,
                                  session_config,
                                  metrics,
                                  updates,
                                  model_dir=None,
                                  train_op=train_op)
        return cls(tf_model,
                   optim_method,
                   sess=sess,
                   dataset=dataset,
                   clip_norm=clip_norm,
                   clip_value=clip_value,
                   model_dir=model_dir)

    @classmethod
    def from_loss(cls,
                  loss,
                  optim_method,
                  session=None,
                  inputs=None,
                  dataset=None,
                  val_outputs=None,
                  val_labels=None,
                  val_method=None,
                  clip_norm=None,
                  clip_value=None,
                  metrics=None,
                  tensor_with_value=None,
                  session_config=None,
                  model_dir=None,
                  updates=None):
        """
        Create a TFOptimizer from a TensorFlow loss tensor.
        The loss tensor must come from a TensorFlow graph that only takes TFDataset.tensors and
        the tensors in `tensor_with_value` as inputs.
        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param session: the current TensorFlow Session, if you want to used a pre-trained model,
        you should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        :param val_outputs: the validation output TensorFlow tensor to be used by val_methods
        :param val_labels: the validation label TensorFlow tensor to be used by val_methods
        :param val_method: the BigDL val_method(s) to be used.
        :param clip_norm: float >= 0. Gradients will be clipped when their L2 norm exceeds
        this value.
        :param clip_value: float >= 0. Gradients will be clipped when their absolute value
        exceeds this value.
        :param metrics: a dictionary. The key should be a string representing the metric's name
        and the value should be the corresponding TensorFlow tensor, which should be a scalar.
        :param tensor_with_value: a dictionary. The key is TensorFlow tensor, usually a
        placeholder, the value of the dictionary is a tuple of two elements. The first one of
        the tuple is the value to feed to the tensor in training phase and the second one
        is the value to feed to the tensor in validation phase.
        :return: a TFOptimizer
        """
        sess = TFOptimizer._get_or_create_session(session)
        grads, variables = TFOptimizer._get_vars_grads(loss)

        if dataset is None and inputs is None:
            dataset = TFOptimizer._get_dataset_from_loss(loss)
            inputs = dataset._original_tensors
        else:
            if inputs is None:
                raise ValueError("please specify inputs")
            _ = dataset.tensors  # trigger creating placeholders

        if isinstance(inputs, tuple) and len(inputs) == 2:
            inputs, labels = inputs
        else:
            labels = []

        inputs = nest.flatten(inputs)
        labels = nest.flatten(labels)

        if clip_value is not None:
            if isinstance(clip_value, float) or isinstance(clip_value, int):
                if clip_value <= 0:
                    ValueError(
                        "The clip_value argument should be positive number")
                clip_value = (-float(clip_value), float(clip_value))

            if not isinstance(clip_value, tuple):
                raise ValueError(
                    "The clip_value argument should be" +
                    " a positive float/int which clips to" +
                    " (-clip_value, clip_value); " +
                    "or a tuple which clips to (min_value, max_value)")

        if val_method is not None:
            val_methods = to_list(val_method)
            if metrics is None:
                metrics = {}

            for i, method in enumerate(val_methods):
                metrics['bigdl_metric_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)

        return TFOptimizer._from_grads(loss, sess, inputs, labels, grads,
                                       variables, dataset, optim_method,
                                       clip_norm, clip_value, metrics,
                                       tensor_with_value, session_config,
                                       model_dir, updates)

    @staticmethod
    def export_training_model(export_dir,
                              loss,
                              sess,
                              inputs,
                              labels=None,
                              predictions=None,
                              metrics=None,
                              tensor_with_value=None,
                              updates=None):

        grads, variables = TFOptimizer._get_vars_grads(loss)

        TFModel.export(export_dir, loss, sess, inputs, labels, predictions,
                       grads, variables, loss.graph, tensor_with_value,
                       metrics, updates)
        logging.info(
            "Exported TensorFlow model in {} for training".format(export_dir))

    @staticmethod
    def _shape_match(model_shape, dataset_shape):

        for i in range(len(dataset_shape)):
            if dataset_shape[i].value is None:
                return model_shape[i].value is None
            else:
                return dataset_shape[i].value == model_shape[i].value or \
                    model_shape[i].value is None

    @classmethod
    def from_keras(cls,
                   keras_model,
                   dataset,
                   session_config=None,
                   model_dir=None,
                   metrics=None,
                   optimizer=None):
        """
        Create a TFOptimizer from a tensorflow.keras model. The model must be compiled.
        :param keras_model: the tensorflow.keras model, which must be compiled.
        :param dataset: a TFDataset
        :return:
        """
        import tensorflow.keras.backend as K

        model_inputs = keras_model.inputs

        if hasattr(keras_model, "targets"):
            model_targets = keras_model.targets
        else:
            model_targets = keras_model._targets

        # target can be None if loss is None
        model_targets = list(filter(lambda x: x is not None, model_targets))

        # standarize feature, labels to support keras model
        if isinstance(dataset, TFNdarrayDataset):
            dataset = _standarize_feature_label_dataset(dataset, keras_model)

        flatten_inputs = nest.flatten(dataset.feature_tensors)
        assert len(model_inputs) == len(flatten_inputs), \
            ("the keras model and TFDataset should have the same number of tensors" +
             " keras model has {} inputs " +
             "while TFDataset has {} inputs").format(len(model_inputs),
                                                     len(flatten_inputs))
        for i in range(len(flatten_inputs)):
            if not TFOptimizer._shape_match(model_inputs[i].shape,
                                            flatten_inputs[i].shape):
                raise ValueError(("The {}th input in keras model {}"
                                  " does not match the TFDataset"
                                  "input {}").format(i, model_inputs[i],
                                                     flatten_inputs[i]))

        flatten_targets = nest.flatten(dataset.label_tensors)
        assert len(model_targets) == len(flatten_targets), \
            ("the keras model and TFDataset should have the same number of tensors" +
             " keras model has {} targets " +
             "while TFDataset has {} labels").format(len(model_targets),
                                                     len(flatten_inputs))
        # todo check targets shape, currently checking target shape will
        # cause too much false alarm.

        loss = keras_model.total_loss
        variables = keras_model._collected_trainable_weights
        variables.sort(key=lambda variable: variable.name)
        keras_optimizer = keras_model.optimizer

        from zoo.tfpark.zoo_optimizer import get_gradients_for_keras
        grads = get_gradients_for_keras(keras_optimizer, loss, variables)
        grads_and_vars = list(zip(grads, variables))
        import tensorflow.python.keras.optimizers as koptimizers
        if isinstance(keras_optimizer, koptimizers.TFOptimizer):
            # work around keras TFOptimzier bug
            train_op = keras_optimizer.optimizer.apply_gradients(
                grads_and_vars)
        else:
            train_op = keras_optimizer.apply_gradients(grads_and_vars)

        sess = K.get_session()

        if keras_model.metrics and (dataset.get_validation_data() is not None):
            if isinstance(keras_model.metrics, dict):
                raise ValueError(
                    "different metrics for different outputs are not supported right now"
                )

            if len(keras_model.outputs) > 1:
                if not all([
                        name.endswith("loss")
                        for name in keras_model.metrics_names
                ]):
                    raise ValueError(
                        "metrics (except loss) for multi-head model is not supported"
                    )
                else:
                    bigdl_val_methods = [Loss()]
                    val_outputs = keras_model.outputs
                    val_labels = model_targets
            else:
                bigdl_val_methods = \
                    [to_bigdl_metric(m, keras_model.loss) for m in keras_model.metrics_names]
                val_outputs = keras_model.outputs
                val_labels = model_targets
        else:
            val_outputs = None
            val_labels = None
            bigdl_val_methods = None

        tensor_with_value = {K.learning_phase(): [True, False]}

        updates = []

        updates += keras_model.get_updates_for(None)
        # Conditional updates relevant to this model
        updates += keras_model.get_updates_for(keras_model.inputs)

        if bigdl_val_methods is not None:
            val_methods = to_list(bigdl_val_methods)
            bigdl_metrics = {}
            for i, method in enumerate(val_methods):
                bigdl_metrics['bigdl_metric_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)
            if metrics is None:
                metrics = bigdl_metrics
            else:
                metrics.update(bigdl_metrics)

        if optimizer is not None:
            clip_norm = None
            clip_value = None
            if hasattr(keras_optimizer, 'clipnorm'):
                clip_norm = keras_optimizer.clipnorm
            if hasattr(keras_optimizer, 'clipvalue'):
                clip_value = (-keras_optimizer.clipvalue,
                              keras_optimizer.clipvalue)
            tf_model = TFModel.create(loss,
                                      sess,
                                      model_inputs,
                                      model_targets,
                                      keras_model.outputs,
                                      grads,
                                      variables,
                                      loss.graph,
                                      tensor_with_value,
                                      session_config,
                                      metrics,
                                      updates,
                                      model_dir=None)

            return cls(tf_model,
                       optimizer,
                       sess=sess,
                       dataset=dataset,
                       clip_norm=clip_norm,
                       clip_value=clip_value,
                       model_dir=model_dir)

        return cls.from_train_op(train_op,
                                 loss,
                                 inputs=model_inputs,
                                 labels=model_targets,
                                 metrics=metrics,
                                 updates=updates,
                                 sess=sess,
                                 dataset=dataset,
                                 tensor_with_value=tensor_with_value,
                                 session_config=session_config,
                                 model_dir=model_dir)

    def set_constant_gradient_clipping(self, min_value, max_value):
        """
        Configure constant clipping settings.

        :param min_value: the minimum value to clip by
        :param max_value: the maxmimum value to clip by
        """
        self.estimator.set_constant_gradient_clipping(min_value, max_value)

    def set_gradient_clipping_by_l2_norm(self, clip_norm):
        """
        Configure L2 norm clipping settings.
        :param clip_norm: gradient L2-Norm threshold
        """
        self.estimator.set_l2_norm_gradient_clipping(clip_norm)

    def optimize(self, end_trigger=None, checkpoint_trigger=None):
        """
        Run the training loop of the this optimizer
        :param end_trigger: BigDL's Trigger to indicate when to stop the training.
        :param checkpoint_trigger: When to save a checkpoint and evaluate model.
        """
        if end_trigger is None:
            end_trigger = MaxEpoch(1)

        if checkpoint_trigger is None:
            checkpoint_trigger = EveryEpoch()

        if self.tf_model.val_methods and self.val_data is not None:
            self.estimator.train_minibatch(
                train_set=self.train_data,
                criterion=self.tf_model.criterion,
                end_trigger=end_trigger,
                checkpoint_trigger=checkpoint_trigger,
                validation_set=self.val_data,
                validation_method=self.tf_model.val_methods)
        else:
            self.estimator.train_minibatch(
                train_set=self.train_data,
                criterion=self.tf_model.criterion,
                end_trigger=end_trigger,
                checkpoint_trigger=checkpoint_trigger)

        self.tf_model.training_helper_layer.get_weights_to_python()
Beispiel #11
0
class BigDLEstimator(OrcaSparkEstimator):
    def __init__(self,
                 *,
                 model,
                 loss,
                 optimizer=None,
                 metrics=None,
                 feature_preprocessing=None,
                 label_preprocessing=None,
                 model_dir=None):
        self.loss = loss
        self.optimizer = optimizer
        self.metrics = Metrics.convert_metrics_list(metrics)
        self.feature_preprocessing = feature_preprocessing
        self.label_preprocessing = label_preprocessing
        self.model_dir = model_dir
        self.model = model
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is None:
            from bigdl.optim.optimizer import SGD
            self.optimizer = SGD()
        self.nn_estimator.setOptimMethod(self.optimizer)
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.log_dir = None
        self.app_name = None
        self.is_nnframe_fit = False

    def fit(self,
            data,
            epochs,
            batch_size=32,
            feature_cols="features",
            label_cols="label",
            caching_sample=True,
            validation_data=None,
            validation_trigger=None,
            checkpoint_trigger=None):
        """
        Train this BigDL model with train data.

        :param data: train data. It can be XShards or Spark DataFrame.
        If data is XShards, each partition is a dictionary of  {'x': feature,
        'y': label}, where feature(label) is a numpy array or a list of numpy arrays.
        :param epochs: Number of epochs to train the model.
        :param batch_size: Batch size used for training. Default: 32.
        :param feature_cols: Feature column name(s) of data. Only used when data is a Spark
        DataFrame. Default: "features".
        :param label_cols: Label column name(s) of data. Only used when data is a Spark DataFrame.
        Default: "label".
        :param caching_sample: whether to cache the Samples after preprocessing. Default: True
        :param validation_data: Validation data. XShards and Spark DataFrame are supported.
        If data is XShards, each partition is a dictionary of  {'x': feature,
        'y': label}, where feature(label) is a numpy array or a list of numpy arrays.
        :param validation_trigger: Orca Trigger to trigger validation computation.
        :param checkpoint_trigger: Orca Trigger to set a checkpoint.
        :return:
        """
        from zoo.orca.learn.trigger import Trigger

        assert batch_size > 0, "batch_size should be greater than 0"

        if validation_data is not None:
            assert self.metrics is not None, \
                "You should provide metrics when creating this estimator if you provide " \
                "validation_data."

        if isinstance(data, DataFrame):
            if isinstance(feature_cols, list):
                data, validation_data, feature_cols = \
                    BigDLEstimator._combine_cols(data, feature_cols, col_name="features",
                                                 val_data=validation_data)

            if isinstance(label_cols, list):
                data, validation_data, label_cols = \
                    BigDLEstimator._combine_cols(data, label_cols, col_name="label",
                                                 val_data=validation_data)

            self.nn_estimator.setBatchSize(batch_size).setMaxEpoch(epochs)\
                .setCachingSample(caching_sample).setFeaturesCol(feature_cols)\
                .setLabelCol(label_cols)

            if validation_data is not None:
                assert isinstance(validation_data, DataFrame), \
                    "validation_data should be a spark DataFrame."
                assert validation_trigger is not None, \
                    "You should provide validation_trigger if you provide validation_data."
                validation_trigger = Trigger.convert_trigger(
                    validation_trigger)
                self.nn_estimator.setValidation(validation_trigger,
                                                validation_data, self.metrics,
                                                batch_size)
            if self.log_dir is not None and self.app_name is not None:
                from bigdl.optim.optimizer import TrainSummary
                from bigdl.optim.optimizer import ValidationSummary
                train_summary = TrainSummary(log_dir=self.log_dir,
                                             app_name=self.app_name)
                self.nn_estimator.setTrainSummary(train_summary)
                val_summary = ValidationSummary(log_dir=self.log_dir,
                                                app_name=self.app_name)
                self.nn_estimator.setValidationSummary(val_summary)
            if self.model_dir is not None and checkpoint_trigger is not None:
                checkpoint_trigger = Trigger.convert_trigger(
                    checkpoint_trigger)
                self.nn_estimator.setCheckpoint(self.model_dir,
                                                checkpoint_trigger)

            self.nn_model = self.nn_estimator.fit(data)
            self.is_nnframe_fit = True
        elif isinstance(data, SparkXShards):
            from zoo.orca.data.utils import xshard_to_sample

            end_trigger = MaxEpoch(epochs)
            checkpoint_trigger = Trigger.convert_trigger(checkpoint_trigger)

            if isinstance(data, SparkXShards):
                train_rdd = data.rdd.flatMap(xshard_to_sample)
                train_feature_set = FeatureSet.sample_rdd(train_rdd)
                if validation_data is None:
                    val_feature_set = None
                else:
                    assert isinstance(validation_data, SparkXShards), \
                        "validation_data should be a XShards"
                    val_feature_set = FeatureSet.sample_rdd(
                        validation_data.rdd.flatMap(xshard_to_sample))
                if self.log_dir is not None and self.app_name is not None:
                    self.estimator.set_tensorboard(self.log_dir, self.app_name)
                self.estimator.train(train_feature_set, self.loss, end_trigger,
                                     checkpoint_trigger, val_feature_set,
                                     self.metrics, batch_size)
                self.is_nnframe_fit = False
            else:
                raise ValueError(
                    "Data and validation data should be XShards, but get " +
                    data.__class__.__name__)
        else:
            raise ValueError(
                "Data should be XShards or Spark DataFrame, but get " +
                data.__class__.__name__)
        return self

    def predict(self,
                data,
                batch_size=4,
                feature_cols="features",
                sample_preprocessing=None):
        """
        Predict input data

        :param data: predict input data. It can be XShards or Spark DataFrame.
        If data is XShards, each partition is a dictionary of  {'x': feature}, where feature
        is a numpy array or a list of numpy arrays.
        :param batch_size: Batch size used for inference. Default: 4.
        :param feature_cols: Feature column name(s) of data. Only used when data is a Spark
        DataFrame. Default: "features".
        :param sample_preprocessing: Used when data is a Spark DataFrame. If the user want change
        the default feature_preprocessing specified in Estimator.from_bigdl, the user can pass the
        new sample_preprocessing methods.
        :return: predicted result.
        If input data is Spark DataFrame, the predict result is a DataFrame which includes original
         columns plus 'prediction' column. The 'prediction' column can be FloatType, VectorUDT
         or Array of VectorUDT depending on model outputs shape.
        If input data is an XShards, the predict result is a XShards, each partition of the XShards
        is a dictionary of {'prediction': result}, where result is a numpy array or a list of numpy
        arrays.
        """
        if isinstance(data, DataFrame):
            if isinstance(feature_cols, list):
                data, _, feature_cols = \
                    BigDLEstimator._combine_cols(data, feature_cols, col_name="features")
            self.nn_model.setBatchSize(batch_size).setFeaturesCol(feature_cols)
            if sample_preprocessing is not None:
                self.nn_model.setSamplePreprocessing(sample_preprocessing)
            return self.nn_model.transform(data)
        elif isinstance(data, SparkXShards):
            from zoo.orca.data.utils import xshard_to_sample
            from zoo.orca.learn.utils import convert_predict_rdd_to_xshard
            sample_rdd = data.rdd.flatMap(xshard_to_sample)
            result_rdd = self.model.predict(sample_rdd)
            return convert_predict_rdd_to_xshard(data, result_rdd)
        else:
            raise ValueError(
                "Data should be XShards or Spark DataFrame, but get " +
                data.__class__.__name__)

    def evaluate(self,
                 data,
                 batch_size=32,
                 feature_cols=None,
                 label_cols=None):
        """
        Evaluate model.

        :param data: validation data. It can be XShards, each partition is a dictionary of
        {'x': feature, 'y': label}, where feature(label) is a numpy array or a list of numpy arrays.
        :param batch_size: Batch size used for validation. Default: 32.
        :param feature_cols: (Not supported yet) Feature column name(s) of data. Only used when
        data is a Spark  DataFrame. Default: None.
        :param label_cols: (Not supported yet) Label column name(s) of data. Only used when data
        is a Spark DataFrame. Default: None.
        :return:
        """
        assert data is not None, "validation data shouldn't be None"
        assert self.metrics is not None, "metrics shouldn't be None, please specify the metrics" \
                                         " argument when creating this estimator."

        if isinstance(data, DataFrame):
            raise NotImplementedError
        elif isinstance(data, SparkXShards):
            from zoo.orca.data.utils import xshard_to_sample
            from zoo.orca.learn.metrics import Metrics

            val_feature_set = FeatureSet.sample_rdd(
                data.rdd.flatMap(xshard_to_sample))
            result = self.estimator.evaluate(val_feature_set, self.metrics,
                                             batch_size)
        else:
            raise ValueError(
                "Data should be XShards or Spark DataFrame, but get " +
                data.__class__.__name__)

        return bigdl_metric_results_to_dict(result)

    def get_model(self):
        """
        Get the trained BigDL model

        :return: The trained BigDL model
        """
        return self.model

    def save(self, model_path):
        """
        Save the BigDL model to model_path

        :param model_path: path to save the trained model.
        :return:
        """
        try:
            model = self.get_model()
            model.saveModel(model_path + ".bigdl", model_path + ".bin", True)
        except ValueError:
            raise ValueError("You should fit before calling save")

    def load(self,
             checkpoint,
             optimizer=None,
             loss=None,
             feature_preprocessing=None,
             label_preprocessing=None,
             model_dir=None,
             is_checkpoint=False):
        """
        Load existing BigDL model or checkpoint

        :param checkpoint: Path to the existing model or checkpoint.
        :param optimizer: BigDL optimizer.
        :param loss: BigDL criterion.
        :param feature_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame.
               The param converts the data in feature column to a Tensor or to a Sample directly.
               It expects a List of Int as the size of the converted Tensor, or a Preprocessing[F,
               Tensor[T]]

               If a List of Int is set as feature_preprocessing, it can only handle the case that
               feature column contains the following data types:
               Float, Double, Int, Array[Float], Array[Double], Array[Int] and MLlib Vector. The
               feature data are converted to Tensors with the specified sizes before
               sending to the model. Internally, a SeqToTensor is generated according to the
               size, and used as the feature_preprocessing.

               Alternatively, user can set feature_preprocessing as Preprocessing[F, Tensor[T]]
               that transforms the feature data to a Tensor[T]. Some pre-defined Preprocessing are
               provided in package zoo.feature. Multiple Preprocessing can be combined as a
               ChainedPreprocessing.

               The feature_preprocessing will also be copied to the generated NNModel and applied
               to feature column during transform.
        :param label_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame.
            similar to feature_preprocessing, but applies to Label data.
        :param model_dir: The path to save model. During the training, if checkpoint_trigger is
            defined and triggered, the model will be saved to model_dir.
        :param is_checkpoint: Whether the path is a checkpoint or a saved BigDL model.
            Default: False.
        :return: The loaded estimator object.
        """
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            self.load_latest_orca_checkpoint(checkpoint)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl",
                                        checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        return self

    def load_orca_checkpoint(self, path, version, prefix=None):
        """
        Load existing checkpoint

        :param path: Path to the existing checkpoint.
        :param version: checkpoint version, which is the suffix of model.* file,
        i.e., for modle.4 file, the version is 4.
        :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc'
        :return:
        """
        from bigdl.nn.layer import Model, Container
        from bigdl.optim.optimizer import OptimMethod
        import os
        try:
            self.model = Model.load(
                os.path.join(path, "model.{}".format(version)))
            assert isinstance(self.model, Container), \
                "The loaded model should be a Container, please check your checkpoint type."
            self.optimizer = OptimMethod.load(
                os.path.join(path, "{}.{}".format(prefix, version)))
        except Exception:
            raise ValueError(
                "Cannot load BigDL checkpoint, please check your checkpoint path "
                "and checkpoint type.")
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is not None:
            self.nn_estimator.setOptimMethod(self.optimizer)
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)

    def load_latest_orca_checkpoint(self, path):
        """
        Load latest Orca checkpoint under specified directory.

        :param path: directory containing Orca checkpoint files.
        """
        from zoo.orca.learn.utils import find_latest_checkpoint
        path, prefix, version = find_latest_checkpoint(path,
                                                       model_type="bigdl")
        if path is None:
            raise ValueError(
                "Cannot find BigDL checkpoint, please check your checkpoint path."
            )
        self.load_orca_checkpoint(path=path, version=version, prefix=prefix)

    def clear_gradient_clipping(self):
        """
        Clear gradient clipping parameters. In this case, gradient clipping will not be applied.
        In order to take effect, it needs to be called before fit.

        :return:
        """
        self.nn_estimator.clearGradientClipping()
        self.estimator.clear_gradient_clipping()

    def set_constant_gradient_clipping(self, min, max):
        """
        Set constant gradient clipping during the training process.
        In order to take effect, it needs to be called before fit.

        :param min: The minimum value to clip by.
        :param max: The maximum value to clip by.
        :return:
        """
        self.nn_estimator.setConstantGradientClipping(min, max)
        self.estimator.set_constant_gradient_clipping(min, max)

    def set_l2_norm_gradient_clipping(self, clip_norm):
        """
        Clip gradient to a maximum L2-Norm during the training process.
        In order to take effect, it needs to be called before fit.

        :param clip_norm: Gradient L2-Norm threshold.
        :return:
        """
        self.nn_estimator.setGradientClippingByL2Norm(clip_norm)
        self.estimator.set_l2_norm_gradient_clipping(clip_norm)

    def get_train_summary(self, tag=None):
        """
        Get the scalar from model train summary
        Return list of summary data of [iteration_number, scalar_value, timestamp]

        tag: The string variable represents the scalar wanted
        """
        # Exception handle
        if tag != "Loss" and tag != "LearningRate" and tag != "Throughput":
            raise TypeError('Only "Loss", "LearningRate", "Throughput"' +
                            'are supported in train summary')
        if self.is_nnframe_fit:
            train_summary = self.nn_estimator.getTrainSummary()
            return train_summary.read_scalar(tag=tag)
        else:
            return self.estimator.get_train_summary(tag=tag)

    def get_validation_summary(self, tag=None):
        """
        Get the scalar from model validation summary
        Return list of summary data of [iteration_number, scalar_value, timestamp]
        Note: The metric and tag may not be consistent
        Please look up following form to pass tag parameter
        Left side is your metric during compile
        Right side is the tag you should pass
        'Accuracy'                  |   'Top1Accuracy'
        'BinaryAccuracy'            |   'Top1Accuracy'
        'CategoricalAccuracy'       |   'Top1Accuracy'
        'SparseCategoricalAccuracy' |   'Top1Accuracy'
        'AUC'                       |   'AucScore'
        'HitRatio'                  |   'HitRate@k' (k is Top-k)
        'Loss'                      |   'Loss'
        'MAE'                       |   'MAE'
        'NDCG'                      |   'NDCG'
        'TFValidationMethod'        |   '${name + " " + valMethod.toString()}'
        'Top5Accuracy'              |   'Top5Accuracy'
        'TreeNNAccuracy'            |   'TreeNNAccuracy()'
        'MeanAveragePrecision'      |   'MAP@k' (k is Top-k) (BigDL)
        'MeanAveragePrecision'      |   'PascalMeanAveragePrecision' (Zoo)
        'StatelessMetric'           |   '${name}'

        tag: The string variable represents the scalar wanted
        """
        if self.is_nnframe_fit:
            assert tag is not None, "You should provide tag which should match the name of " \
                                    "the ValidationMethod set into the optimizer. " \
                                    "e.g.'MAE', 'Top1AccuracyLoss', 'Top1Accuracy' or " \
                                    "'Top5Accuracy'."
            val_summary = self.nn_estimator.getValidationSummary()
            return val_summary.read_scalar(tag=tag)
        else:
            return self.estimator.get_validation_summary(tag=tag)

    @staticmethod
    def _combine_cols(data, cols, col_name="features", val_data=None):
        if isinstance(cols, list):
            if len(cols) == 1:
                col_name = cols[0]
            else:
                from pyspark.ml.feature import VectorAssembler
                assembler = VectorAssembler(inputCols=cols, outputCol=col_name)
                data = assembler.transform(data)
                if val_data is not None:
                    val_data = assembler.transform(val_data)
        return data, val_data, col_name
Beispiel #12
0
    def load(self,
             checkpoint,
             optimizer=None,
             loss=None,
             feature_preprocessing=None,
             label_preprocessing=None,
             model_dir=None,
             is_checkpoint=False):
        """
        Load existing BigDL model or checkpoint

        :param checkpoint: Path to the existing model or checkpoint.
        :param optimizer: BigDL optimizer.
        :param loss: BigDL criterion.
        :param feature_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame.
               The param converts the data in feature column to a Tensor or to a Sample directly.
               It expects a List of Int as the size of the converted Tensor, or a Preprocessing[F,
               Tensor[T]]

               If a List of Int is set as feature_preprocessing, it can only handle the case that
               feature column contains the following data types:
               Float, Double, Int, Array[Float], Array[Double], Array[Int] and MLlib Vector. The
               feature data are converted to Tensors with the specified sizes before
               sending to the model. Internally, a SeqToTensor is generated according to the
               size, and used as the feature_preprocessing.

               Alternatively, user can set feature_preprocessing as Preprocessing[F, Tensor[T]]
               that transforms the feature data to a Tensor[T]. Some pre-defined Preprocessing are
               provided in package zoo.feature. Multiple Preprocessing can be combined as a
               ChainedPreprocessing.

               The feature_preprocessing will also be copied to the generated NNModel and applied
               to feature column during transform.
        :param label_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame.
            similar to feature_preprocessing, but applies to Label data.
        :param model_dir: The path to save model. During the training, if checkpoint_trigger is
            defined and triggered, the model will be saved to model_dir.
        :param is_checkpoint: Whether the path is a checkpoint or a saved BigDL model.
            Default: False.
        :return: The loaded estimator object.
        """
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            self.load_latest_orca_checkpoint(checkpoint)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl",
                                        checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        return self
Beispiel #13
0
class BigDLEstimatorWrapper(OrcaSparkEstimator):
    def __init__(self,
                 *,
                 model,
                 loss,
                 optimizer=None,
                 feature_preprocessing=None,
                 label_preprocessing=None,
                 model_dir=None):
        self.loss = loss
        self.optimizer = optimizer
        self.feature_preprocessing = feature_preprocessing
        self.label_preprocessing = label_preprocessing
        self.model_dir = model_dir
        self.model = model
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is None:
            from bigdl.optim.optimizer import SGD
            self.optimizer = SGD()
        self.nn_estimator.setOptimMethod(self.optimizer)
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.log_dir = None
        self.app_name = None
        self.is_nnframe_fit = False

    def fit(self,
            data,
            epochs,
            feature_cols="features",
            labels_cols="label",
            batch_size=32,
            caching_sample=True,
            val_data=None,
            val_trigger=None,
            val_methods=None,
            checkpoint_trigger=None):
        from zoo.orca.learn.metrics import Metrics
        from zoo.orca.learn.trigger import Trigger

        assert batch_size > 0, "batch_size should be greater than 0"

        if isinstance(data, DataFrame):
            if isinstance(feature_cols, list):
                data, val_data, feature_cols = \
                    BigDLEstimatorWrapper._combine_cols(data, feature_cols, col_name="features",
                                                        val_data=val_data)

            if isinstance(labels_cols, list):
                data, val_data, labels_cols = \
                    BigDLEstimatorWrapper._combine_cols(data, labels_cols, col_name="label",
                                                        val_data=val_data)

            self.nn_estimator.setBatchSize(batch_size).setMaxEpoch(epochs)\
                .setCachingSample(caching_sample).setFeaturesCol(feature_cols)\
                .setLabelCol(labels_cols)

            if val_data is not None:
                assert isinstance(
                    val_data,
                    DataFrame), "val_data should be a spark DataFrame."
                assert val_trigger is not None and val_methods is not None, \
                    "You should provide val_trigger and val_methods if you provide val_data."
                val_trigger = Trigger.convert_trigger(val_trigger)
                val_methods = Metrics.convert_metrics_list(val_methods)
                self.nn_estimator.setValidation(val_trigger, val_data,
                                                val_methods, batch_size)
            if self.log_dir is not None and self.app_name is not None:
                from bigdl.optim.optimizer import TrainSummary
                from bigdl.optim.optimizer import ValidationSummary
                train_summary = TrainSummary(log_dir=self.log_dir,
                                             app_name=self.app_name)
                self.nn_estimator.setTrainSummary(train_summary)
                val_summary = ValidationSummary(log_dir=self.log_dir,
                                                app_name=self.log_dir)
                self.nn_estimator.setValidationSummary(val_summary)
            if self.model_dir is not None and checkpoint_trigger is not None:
                checkpoint_trigger = Trigger.convert_trigger(
                    checkpoint_trigger)
                self.nn_estimator.setCheckpoint(self.model_dir,
                                                checkpoint_trigger)

            self.nn_model = self.nn_estimator.fit(data)
            self.is_nnframe_fit = True
        elif isinstance(data, SparkXShards):
            from zoo.orca.data.utils import to_sample

            end_trigger = MaxEpoch(epochs)
            val_methods = Metrics.convert_metrics_list(val_methods)
            checkpoint_trigger = Trigger.convert_trigger(checkpoint_trigger)

            if isinstance(data, SparkXShards):
                train_rdd = data.rdd.flatMap(to_sample)
                train_feature_set = FeatureSet.sample_rdd(train_rdd)
                if val_data is None:
                    val_feature_set = None
                else:
                    assert isinstance(
                        val_data, SparkXShards), "val_data should be a XShards"
                    val_feature_set = FeatureSet.sample_rdd(
                        val_data.rdd.flatMap(to_sample))
                if self.log_dir is not None and self.app_name is not None:
                    self.estimator.set_tensorboard(self.log_dir, self.app_name)
                self.estimator.train(train_feature_set, self.loss, end_trigger,
                                     checkpoint_trigger, val_feature_set,
                                     val_methods, batch_size)
                self.is_nnframe_fit = False
            else:
                raise ValueError(
                    "Data and validation data should be XShards, but get " +
                    data.__class__.__name__)
        else:
            raise ValueError(
                "Data should be XShards or Spark DataFrame, but get " +
                data.__class__.__name__)
        return self

    def predict(self,
                data,
                batch_size=8,
                feature_cols="features",
                sample_preprocessing=None):
        if isinstance(data, DataFrame):
            if isinstance(feature_cols, list):
                data, _, feature_cols = \
                    BigDLEstimatorWrapper._combine_cols(data, feature_cols, col_name="features")
            self.nn_model.setBatchSize(batch_size).setFeaturesCol(feature_cols)
            if sample_preprocessing is not None:
                self.nn_model.setSamplePreprocessing(sample_preprocessing)
            return self.nn_model.transform(data)
        elif isinstance(data, SparkXShards):
            from zoo.orca.data.utils import to_sample
            from zoo.orca.learn.utils import convert_predict_to_xshard
            sample_rdd = data.rdd.flatMap(to_sample)
            result_rdd = self.model.predict(sample_rdd)
            return convert_predict_to_xshard(result_rdd)
        else:
            raise ValueError(
                "Data should be XShards or Spark DataFrame, but get " +
                data.__class__.__name__)

    def evaluate(self, data, validation_methods=None, batch_size=32):
        assert data is not None, "validation data shouldn't be None"

        if isinstance(data, DataFrame):
            raise NotImplementedError
        elif isinstance(data, SparkXShards):
            from zoo.orca.data.utils import to_sample
            from zoo.orca.learn.metrics import Metrics

            validation_methods = Metrics.convert_metrics_list(
                validation_methods)
            val_feature_set = FeatureSet.sample_rdd(
                data.rdd.flatMap(to_sample))
            return self.estimator.evaluate(val_feature_set, validation_methods,
                                           batch_size)
        else:
            raise ValueError(
                "Data should be XShards or Spark DataFrame, but get " +
                data.__class__.__name__)

    def get_model(self):
        return self.model

    def save(self, model_path):
        try:
            model = self.get_model()
            model.saveModel(model_path + ".bigdl", model_path + ".bin", True)
        except ValueError:
            raise ValueError("You should fit before calling save")

    def load(self,
             checkpoint,
             optimizer=None,
             loss=None,
             feature_preprocessing=None,
             label_preprocessing=None,
             model_dir=None,
             is_checkpoint=False):
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            self.load_latest_orca_checkpoint(checkpoint)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl",
                                        checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        return self

    def load_orca_checkpoint(self, path, version, prefix=None):
        from bigdl.nn.layer import Model, Container
        from bigdl.optim.optimizer import OptimMethod
        import os
        try:
            self.model = Model.load(
                os.path.join(path, "model.{}".format(version)))
            assert isinstance(self.model, Container), \
                "The loaded model should be a Container, please check your checkpoint type."
            self.optimizer = OptimMethod.load(
                os.path.join(path, "{}.{}".format(prefix, version)))
        except Exception:
            raise ValueError(
                "Cannot load BigDL checkpoint, please check your checkpoint path "
                "and checkpoint type.")
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is not None:
            self.nn_estimator.setOptimMethod(self.optimizer)
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)

    def load_latest_orca_checkpoint(self, path):
        from zoo.orca.learn.utils import find_latest_checkpoint
        path, prefix, version = find_latest_checkpoint(path,
                                                       model_type="bigdl")
        if path is None:
            raise ValueError(
                "Cannot find BigDL checkpoint, please check your checkpoint path."
            )
        self.load_orca_checkpoint(path=path, version=version, prefix=prefix)

    def clear_gradient_clipping(self):
        self.nn_estimator.clearGradientClipping()
        self.estimator.clear_gradient_clipping()

    def set_constant_gradient_clipping(self, min, max):
        self.nn_estimator.setConstantGradientClipping(min, max)
        self.estimator.set_constant_gradient_clipping(min, max)

    def set_l2_norm_gradient_clipping(self, clip_norm):
        self.nn_estimator.setGradientClippingByL2Norm(clip_norm)
        self.estimator.set_l2_norm_gradient_clipping(clip_norm)

    def get_train_summary(self, tag=None):
        if self.is_nnframe_fit:
            return self.nn_estimator.getTrainSummary()
        else:
            return self.estimator.get_train_summary(tag=tag)

    def get_validation_summary(self, tag=None):
        if self.is_nnframe_fit:
            return self.nn_estimator.getValidationSummary()
        else:
            return self.estimator.get_validation_summary(tag=tag)

    @staticmethod
    def _combine_cols(data, cols, col_name="features", val_data=None):
        if isinstance(cols, list):
            if len(cols) == 1:
                col_name = cols[0]
            else:
                from pyspark.ml.feature import VectorAssembler
                assembler = VectorAssembler(inputCols=cols, outputCol=col_name)
                data = assembler.transform(data)
                if val_data is not None:
                    val_data = assembler.transform(val_data)
        return data, val_data, col_name
Beispiel #14
0
    def load(self,
             checkpoint,
             optimizer=None,
             loss=None,
             feature_preprocessing=None,
             label_preprocessing=None,
             model_dir=None,
             is_checkpoint=False):
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            from zoo.orca.learn.utils import find_latest_checkpoint
            from zoo.pipeline.api.net import Net
            from bigdl.nn.layer import Model, Container
            from bigdl.optim.optimizer import OptimMethod
            import os
            path, prefix, version = find_latest_checkpoint(checkpoint,
                                                           model_type="bigdl")
            if path is None:
                raise ValueError(
                    "Cannot find BigDL checkpoint, please check your checkpoint path."
                )
            try:
                self.model = Model.load(
                    os.path.join(path, "model.{}".format(version)))
                assert isinstance(self.model, Container), \
                    "The loaded model should be a Container, please check your checkpoint type."
                self.optimizer = OptimMethod.load(
                    os.path.join(path, "{}.{}".format(prefix, version)))
            except Exception:
                raise ValueError(
                    "Cannot load BigDL checkpoint, please check your checkpoint path "
                    "and checkpoint type.")
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is not None:
                self.nn_estimator.setOptimMethod(self.optimizer)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl",
                                        checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        return self