Ejemplo n.º 1
0
    def loadMetadataAndInstance(pyspark_xgb_cls, path, sc, logger):
        """
        Load the metadata and the instance of an xgboost.spark._SparkXGBEstimator or
        xgboost.spark._SparkXGBModel.

        :return: a tuple of (metadata, instance)
        """
        metadata = DefaultParamsReader.loadMetadata(
            path, sc, expectedClassName=get_class_name(pyspark_xgb_cls))
        pyspark_xgb = pyspark_xgb_cls()
        DefaultParamsReader.getAndSetParams(pyspark_xgb, metadata)

        if "serialized_callbacks" in metadata:
            serialized_callbacks = metadata["serialized_callbacks"]
            try:
                callbacks = cloudpickle.loads(
                    base64.decodebytes(serialized_callbacks.encode("ascii")))
                pyspark_xgb.set(pyspark_xgb.callbacks, callbacks)
            except Exception as e:  # pylint: disable=W0703
                logger.warning(
                    f"Fails to load the callbacks param due to {e}. Please set the "
                    "callbacks param manually for the loaded estimator.")

        if "init_booster" in metadata:
            load_path = os.path.join(path, metadata["init_booster"])
            ser_init_booster = (_get_spark_session().read.parquet(
                load_path).collect()[0].init_booster)
            init_booster = deserialize_booster(ser_init_booster)
            pyspark_xgb.set(pyspark_xgb.xgb_model, init_booster)

        pyspark_xgb._resetUid(metadata["uid"])  # pylint: disable=protected-access
        return metadata, pyspark_xgb
Ejemplo n.º 2
0
    def load(self, path):
        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
        metadata['paramMap'] = self._deserialize_dict(metadata['paramMap'])
        metadata['defaultParamMap'] = self._deserialize_dict(metadata['defaultParamMap'])

        py_type = DefaultParamsReader._DefaultParamsReader__get_class(metadata['class'])
        instance = py_type()
        instance._resetUid(metadata['uid'])
        DefaultParamsReader.getAndSetParams(instance, metadata)
        return instance