Beispiel #1
0
    def loadMetadataAndInstance(pyspark_xgb_cls, path, sc, logger):
        """
        Load the metadata and the instance of an xgboost.spark._SparkXGBEstimator or
        xgboost.spark._SparkXGBModel.

        :return: a tuple of (metadata, instance)
        """
        metadata = DefaultParamsReader.loadMetadata(
            path, sc, expectedClassName=get_class_name(pyspark_xgb_cls))
        pyspark_xgb = pyspark_xgb_cls()
        DefaultParamsReader.getAndSetParams(pyspark_xgb, metadata)

        if "serialized_callbacks" in metadata:
            serialized_callbacks = metadata["serialized_callbacks"]
            try:
                callbacks = cloudpickle.loads(
                    base64.decodebytes(serialized_callbacks.encode("ascii")))
                pyspark_xgb.set(pyspark_xgb.callbacks, callbacks)
            except Exception as e:  # pylint: disable=W0703
                logger.warning(
                    f"Fails to load the callbacks param due to {e}. Please set the "
                    "callbacks param manually for the loaded estimator.")

        if "init_booster" in metadata:
            load_path = os.path.join(path, metadata["init_booster"])
            ser_init_booster = (_get_spark_session().read.parquet(
                load_path).collect()[0].init_booster)
            init_booster = deserialize_booster(ser_init_booster)
            pyspark_xgb.set(pyspark_xgb.xgb_model, init_booster)

        pyspark_xgb._resetUid(metadata["uid"])  # pylint: disable=protected-access
        return metadata, pyspark_xgb
Beispiel #2
0
    def load(self, path):
        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
        metadata['paramMap'] = self._deserialize_dict(metadata['paramMap'])
        metadata['defaultParamMap'] = self._deserialize_dict(metadata['defaultParamMap'])

        py_type = DefaultParamsReader._DefaultParamsReader__get_class(metadata['class'])
        instance = py_type()
        instance._resetUid(metadata['uid'])
        DefaultParamsReader.getAndSetParams(instance, metadata)
        return instance
Beispiel #3
0
 def load(self, path: str) -> "PipelineModel":
     metadata = DefaultParamsReader.loadMetadata(path, self.sc)
     if "language" not in metadata["paramMap"] or metadata["paramMap"]["language"] != "Python":
         return JavaMLReader(cast(Type["JavaMLReadable[PipelineModel]"], self.cls)).load(path)
     else:
         uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
         return PipelineModel(stages=cast(List[Transformer], stages))._resetUid(uid)
Beispiel #4
0
 def load(self, path):
     metadata = DefaultParamsReader.loadMetadata(path, self.sc)
     if 'language' not in metadata['paramMap'] or metadata['paramMap']['language'] != 'Python':
         return JavaMLReader(self.cls).load(path)
     else:
         uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
         return PipelineModel(stages=stages)._resetUid(uid)
Beispiel #5
0
 def load(self, path):
     metadata = DefaultParamsReader.loadMetadata(path, self.sc)
     if "language" not in metadata[
             "paramMap"] or metadata["paramMap"]["language"] != "Python":
         return JavaMLReader(self.cls).load(path)
     else:
         uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
         return PipelineModel(stages=stages)._resetUid(uid)
Beispiel #6
0
    def load(metadata, sc, path):
        """
        Load metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`

        :return: (UID, list of stages)
        """
        stagesDir = os.path.join(path, "stages")
        stageUids = metadata['paramMap']['stageUids']
        stages = []
        for index, stageUid in enumerate(stageUids):
            stagePath = \
                PipelineSharedReadWrite.getStagePath(stageUid, index, len(stageUids), stagesDir)
            stage = DefaultParamsReader.loadParamsInstance(stagePath, sc)
            stages.append(stage)
        return (metadata['uid'], stages)
Beispiel #7
0
    def load(metadata: Dict[str, Any], sc: SparkContext,
             path: str) -> Tuple[str, List["PipelineStage"]]:
        """
        Load metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`

        Returns
        -------
        tuple
            (UID, list of stages)
        """
        stagesDir = os.path.join(path, "stages")
        stageUids = metadata["paramMap"]["stageUids"]
        stages = []
        for index, stageUid in enumerate(stageUids):
            stagePath = PipelineSharedReadWrite.getStagePath(
                stageUid, index, len(stageUids), stagesDir)
            stage: "PipelineStage" = DefaultParamsReader.loadParamsInstance(
                stagePath, sc)
            stages.append(stage)
        return (metadata["uid"], stages)
Beispiel #8
0
 def load(self, path):
     metadata = DefaultParamsReader.loadMetadata(path, self.sc)
     uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
     return AddResidualConnection(stages=stages)._resetUid(uid)