Example #1
0
    def load_orca_checkpoint(self, path, version, prefix=None):
        """
        Load existing checkpoint

        :param path: Path to the existing checkpoint.
        :param version: checkpoint version, which is the suffix of model.* file,
        i.e., for modle.4 file, the version is 4.
        :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc'
        :return:
        """
        from bigdl.nn.layer import Model, Container
        from bigdl.optim.optimizer import OptimMethod
        import os
        try:
            self.model = Model.load(
                os.path.join(path, "model.{}".format(version)))
            assert isinstance(self.model, Container), \
                "The loaded model should be a Container, please check your checkpoint type."
            self.optimizer = OptimMethod.load(
                os.path.join(path, "{}.{}".format(prefix, version)))
        except Exception:
            raise ValueError(
                "Cannot load BigDL checkpoint, please check your checkpoint path "
                "and checkpoint type.")
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is not None:
            self.nn_estimator.setOptimMethod(self.optimizer)
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)
Example #2
0
 def __init__(self,
              *,
              model,
              loss,
              optimizer=None,
              metrics=None,
              feature_preprocessing=None,
              label_preprocessing=None,
              model_dir=None):
     self.loss = loss
     self.optimizer = optimizer
     self.metrics = Metrics.convert_metrics_list(metrics)
     self.feature_preprocessing = feature_preprocessing
     self.label_preprocessing = label_preprocessing
     self.model_dir = model_dir
     self.model = model
     self.nn_model = NNModel(
         self.model, feature_preprocessing=self.feature_preprocessing)
     self.nn_estimator = NNEstimator(self.model, self.loss,
                                     self.feature_preprocessing,
                                     self.label_preprocessing)
     if self.optimizer is None:
         from bigdl.optim.optimizer import SGD
         self.optimizer = SGD()
     self.nn_estimator.setOptimMethod(self.optimizer)
     self.estimator = SparkEstimator(self.model, self.optimizer,
                                     self.model_dir)
     self.log_dir = None
     self.app_name = None
     self.is_nnframe_fit = False
Example #3
0
    def load(self, checkpoint, optimizer=None, loss=None, feature_preprocessing=None,
             label_preprocessing=None, model_dir=None, is_checkpoint=False):
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            self.load_latest_orca_checkpoint(checkpoint)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl", checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir)
            self.nn_model = NNModel(self.model, feature_preprocessing=self.feature_preprocessing)
        return self
Example #4
0
    def load_orca_checkpoint(self, path, version=None, prefix=None):
        """
        Load existing checkpoint. To load a specific checkpoint, please provide both `version`
        and `perfix`. If `version` is None, then the latest checkpoint under the specified
        directory will be loaded.

        :param path: Path to the existing checkpoint (or directory containing Orca checkpoint
               files).
        :param version: checkpoint version, which is the suffix of model.* file, i.e., for
               modle.4 file, the version is 4. If it is None, then load the latest checkpoint.
        :param prefix: optimMethod prefix, for example 'optimMethod-Sequentialf53bddcc'
        :return:
        """
        from bigdl.nn.layer import Model, Container
        from bigdl.optim.optimizer import OptimMethod
        from zoo.orca.learn.utils import find_latest_checkpoint
        import os

        if version is None:
            path, prefix, version = find_latest_checkpoint(path,
                                                           model_type="bigdl")
            if path is None:
                raise ValueError(
                    "Cannot find BigDL checkpoint, please check your checkpoint"
                    " path.")
        else:
            assert prefix is not None, "You should provide optimMethod prefix, " \
                                       "for example 'optimMethod-TorchModelf53bddcc'"

        try:
            self.model = Model.load(
                os.path.join(path, "model.{}".format(version)))
            assert isinstance(self.model, Container), \
                "The loaded model should be a Container, please check your checkpoint type."
            self.optimizer = OptimMethod.load(
                os.path.join(path, "{}.{}".format(prefix, version)))
        except Exception:
            raise ValueError(
                "Cannot load BigDL checkpoint, please check your checkpoint path "
                "and checkpoint type.")
        self.estimator = SparkEstimator(self.model, self.optimizer,
                                        self.model_dir)
        self.nn_estimator = NNEstimator(self.model, self.loss,
                                        self.feature_preprocessing,
                                        self.label_preprocessing)
        if self.optimizer is not None:
            self.nn_estimator.setOptimMethod(self.optimizer)
        self.nn_model = NNModel(
            self.model, feature_preprocessing=self.feature_preprocessing)
Example #5
0
 def load_orca_checkpoint(self, path, version, prefix=None):
     from bigdl.nn.layer import Model, Container
     from bigdl.optim.optimizer import OptimMethod
     import os
     try:
         self.model = Model.load(os.path.join(path, "model.{}".format(version)))
         assert isinstance(self.model, Container), \
             "The loaded model should be a Container, please check your checkpoint type."
         self.optimizer = OptimMethod.load(os.path.join(path,
                                                        "{}.{}".format(prefix, version)))
     except Exception:
         raise ValueError("Cannot load BigDL checkpoint, please check your checkpoint path "
                          "and checkpoint type.")
     self.estimator = SparkEstimator(self.model, self.optimizer, self.model_dir)
     self.nn_estimator = NNEstimator(self.model, self.loss, self.feature_preprocessing,
                                     self.label_preprocessing)
     if self.optimizer is not None:
         self.nn_estimator.setOptimMethod(self.optimizer)
     self.nn_model = NNModel(self.model, feature_preprocessing=self.feature_preprocessing)
Example #6
0
    def load(self,
             checkpoint,
             optimizer=None,
             loss=None,
             feature_preprocessing=None,
             label_preprocessing=None,
             model_dir=None,
             is_checkpoint=False):
        """
        Load existing BigDL model or checkpoint

        :param checkpoint: Path to the existing model or checkpoint.
        :param optimizer: BigDL optimizer.
        :param loss: BigDL criterion.
        :param feature_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame.
               The param converts the data in feature column to a Tensor or to a Sample directly.
               It expects a List of Int as the size of the converted Tensor, or a Preprocessing[F,
               Tensor[T]]

               If a List of Int is set as feature_preprocessing, it can only handle the case that
               feature column contains the following data types:
               Float, Double, Int, Array[Float], Array[Double], Array[Int] and MLlib Vector. The
               feature data are converted to Tensors with the specified sizes before
               sending to the model. Internally, a SeqToTensor is generated according to the
               size, and used as the feature_preprocessing.

               Alternatively, user can set feature_preprocessing as Preprocessing[F, Tensor[T]]
               that transforms the feature data to a Tensor[T]. Some pre-defined Preprocessing are
               provided in package zoo.feature. Multiple Preprocessing can be combined as a
               ChainedPreprocessing.

               The feature_preprocessing will also be copied to the generated NNModel and applied
               to feature column during transform.
        :param label_preprocessing: Used when data in `fit` and `predict` is a Spark DataFrame.
            similar to feature_preprocessing, but applies to Label data.
        :param model_dir: The path to save model. During the training, if checkpoint_trigger is
            defined and triggered, the model will be saved to model_dir.
        :param is_checkpoint: Whether the path is a checkpoint or a saved BigDL model.
            Default: False.
        :return: The loaded estimator object.
        """
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            self.load_latest_orca_checkpoint(checkpoint)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl",
                                        checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        return self
Example #7
0
    def load(self,
             checkpoint,
             optimizer=None,
             loss=None,
             feature_preprocessing=None,
             label_preprocessing=None,
             model_dir=None,
             is_checkpoint=False):
        if loss is not None:
            self.loss = loss
        if optimizer is not None:
            self.optimizer = optimizer
        if feature_preprocessing is not None:
            self.feature_preprocessing = feature_preprocessing
        if label_preprocessing is not None:
            self.label_preprocessing = label_preprocessing
        if model_dir is not None:
            self.model_dir = model_dir

        if is_checkpoint:
            from zoo.orca.learn.utils import find_latest_checkpoint
            from zoo.pipeline.api.net import Net
            from bigdl.nn.layer import Model, Container
            from bigdl.optim.optimizer import OptimMethod
            import os
            path, prefix, version = find_latest_checkpoint(checkpoint,
                                                           model_type="bigdl")
            if path is None:
                raise ValueError(
                    "Cannot find BigDL checkpoint, please check your checkpoint path."
                )
            try:
                self.model = Model.load(
                    os.path.join(path, "model.{}".format(version)))
                assert isinstance(self.model, Container), \
                    "The loaded model should be a Container, please check your checkpoint type."
                self.optimizer = OptimMethod.load(
                    os.path.join(path, "{}.{}".format(prefix, version)))
            except Exception:
                raise ValueError(
                    "Cannot load BigDL checkpoint, please check your checkpoint path "
                    "and checkpoint type.")
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is not None:
                self.nn_estimator.setOptimMethod(self.optimizer)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        else:
            from zoo.pipeline.api.net import Net
            self.model = Net.load_bigdl(checkpoint + ".bigdl",
                                        checkpoint + ".bin")

            self.nn_estimator = NNEstimator(self.model, self.loss,
                                            self.feature_preprocessing,
                                            self.label_preprocessing)
            if self.optimizer is None:
                from bigdl.optim.optimizer import SGD
                self.optimizer = SGD()
            self.nn_estimator.setOptimMethod(self.optimizer)
            self.estimator = SparkEstimator(self.model, self.optimizer,
                                            self.model_dir)
            self.nn_model = NNModel(
                self.model, feature_preprocessing=self.feature_preprocessing)
        return self