Python Pipeline.save Examples

Programming Language: Python

Namespace/Package Name: pyspark.ml.pipeline

Class/Type: Pipeline

Method/Function: save

Examples at hotexamples.com: 7

Python Pipeline.save - 7 examples found. These are the top rated real world Python examples of pyspark.ml.pipeline.Pipeline.save extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Pipeline(30)

fit(30)

write(7)

save(5)

transform(4)

load(3)

setStages(3)

explainParams(2)

getStages(1)

Example #1

Show file

File: test_persistence.py Project: zhengruifeng/spark

    def test_nested_pipeline_persistence(self):
        """
        Pipeline[HashingTF, Pipeline[PCA]]
        """
        temp_path = tempfile.mkdtemp()

        try:
            df = self.spark.createDataFrame([(["a", "b", "c"], ),
                                             (["c", "d", "e"], )], ["words"])
            tf = HashingTF(numFeatures=10,
                           inputCol="words",
                           outputCol="features")
            pca = PCA(k=2, inputCol="features", outputCol="pca_features")
            p0 = Pipeline(stages=[pca])
            pl = Pipeline(stages=[tf, p0])
            model = pl.fit(df)

            pipeline_path = temp_path + "/pipeline"
            pl.save(pipeline_path)
            loaded_pipeline = Pipeline.load(pipeline_path)
            self._compare_pipelines(pl, loaded_pipeline)

            model_path = temp_path + "/pipeline-model"
            model.save(model_path)
            loaded_model = PipelineModel.load(model_path)
            self._compare_pipelines(model, loaded_model)
        finally:
            try:
                rmtree(temp_path)
            except OSError:
                pass

Example #2

Show file

File: test_persistence.py Project: zhengruifeng/spark

    def test_python_transformer_pipeline_persistence(self):
        """
        Pipeline[MockUnaryTransformer, Binarizer]
        """
        temp_path = tempfile.mkdtemp()

        try:
            df = self.spark.range(0, 10).toDF("input")
            tf = MockUnaryTransformer(
                shiftVal=2).setInputCol("input").setOutputCol("shiftedInput")
            tf2 = Binarizer(threshold=6,
                            inputCol="shiftedInput",
                            outputCol="binarized")
            pl = Pipeline(stages=[tf, tf2])
            model = pl.fit(df)

            pipeline_path = temp_path + "/pipeline"
            pl.save(pipeline_path)
            loaded_pipeline = Pipeline.load(pipeline_path)
            self._compare_pipelines(pl, loaded_pipeline)

            model_path = temp_path + "/pipeline-model"
            model.save(model_path)
            loaded_model = PipelineModel.load(model_path)
            self._compare_pipelines(model, loaded_model)
        finally:
            try:
                rmtree(temp_path)
            except OSError:
                pass

Example #3

Show file

File: test_persistence.py Project: JingchengDu/spark

    def test_python_transformer_pipeline_persistence(self):
        """
        Pipeline[MockUnaryTransformer, Binarizer]
        """
        temp_path = tempfile.mkdtemp()

        try:
            df = self.spark.range(0, 10).toDF('input')
            tf = MockUnaryTransformer(shiftVal=2)\
                .setInputCol("input").setOutputCol("shiftedInput")
            tf2 = Binarizer(threshold=6, inputCol="shiftedInput", outputCol="binarized")
            pl = Pipeline(stages=[tf, tf2])
            model = pl.fit(df)

            pipeline_path = temp_path + "/pipeline"
            pl.save(pipeline_path)
            loaded_pipeline = Pipeline.load(pipeline_path)
            self._compare_pipelines(pl, loaded_pipeline)

            model_path = temp_path + "/pipeline-model"
            model.save(model_path)
            loaded_model = PipelineModel.load(model_path)
            self._compare_pipelines(model, loaded_model)
        finally:
            try:
                rmtree(temp_path)
            except OSError:
                pass

Example #4

Show file

File: test_persistence.py Project: JingchengDu/spark

    def test_nested_pipeline_persistence(self):
        """
        Pipeline[HashingTF, Pipeline[PCA]]
        """
        temp_path = tempfile.mkdtemp()

        try:
            df = self.spark.createDataFrame([(["a", "b", "c"],), (["c", "d", "e"],)], ["words"])
            tf = HashingTF(numFeatures=10, inputCol="words", outputCol="features")
            pca = PCA(k=2, inputCol="features", outputCol="pca_features")
            p0 = Pipeline(stages=[pca])
            pl = Pipeline(stages=[tf, p0])
            model = pl.fit(df)

            pipeline_path = temp_path + "/pipeline"
            pl.save(pipeline_path)
            loaded_pipeline = Pipeline.load(pipeline_path)
            self._compare_pipelines(pl, loaded_pipeline)

            model_path = temp_path + "/pipeline-model"
            model.save(model_path)
            loaded_model = PipelineModel.load(model_path)
            self._compare_pipelines(model, loaded_model)
        finally:
            try:
                rmtree(temp_path)
            except OSError:
                pass

Example #5

Show file

File: cnn_example.py Project: stjordanis/sparkflow


if __name__ == '__main__':
    spark = SparkSession.builder \
        .appName("examples") \
        .master('local[8]').config('spark.driver.memory', '2g') \
        .getOrCreate()

    df = spark.read.option(
        "inferSchema", "true").csv('examples/mnist_train.csv').orderBy(rand())
    mg = build_graph(cnn_model)
    va = VectorAssembler(inputCols=df.columns[1:785], outputCol='features')
    encoded = OneHotEncoder(inputCol='_c0', outputCol='labels', dropLast=False)

    spark_model = SparkAsyncDL(inputCol='features',
                               tensorflowGraph=mg,
                               tfInput='x:0',
                               tfLabel='y:0',
                               tfOptimizer='adam',
                               miniBatchSize=300,
                               miniStochasticIters=-1,
                               shufflePerIter=True,
                               iters=20,
                               tfLearningRate=.0001,
                               predictionCol='predicted',
                               labelCol='labels',
                               verbose=1)

    p = Pipeline(stages=[va, encoded, spark_model]).fit(df)
    p.save("cnn")

Example #6

Show file

File: main.py Project: deepaks2112/RoadDetectionSpark

network = UNet(1)

torch_obj = serialize_torch_obj(
    model = network,
    criterion=soft_dice_loss,
    optimizer=torch.optim.Adam,
    lr=0.0001
)

spark_model = SparkTorch(
    inputCol='features',
    labelCol='labels',
    predictionCol='predictions',
    torchObj=torch_obj,
    iters=10,
    verbose=1
)

print("Ran successfully")

data_train = spark.read.option("inferSchema","true").option("maxColumns",64*64*4).csv(data_train_path)

features_size = 64*64*3
va1 = VectorAssembler(inputCols=data_train.columns[:features_size],
                      outputCol='features')
va2 = VectorAssembler(inputCols=data_train.columns[features_size:],
                      outputCol='labels')

p = Pipeline(stages=[va1, va2, spark_model]).fit(data_train)
p.save('unet')

Example #7

Show file

File: simple_dnn.py Project: yongsheng268/sparkflow

                               tensorflowGraph=mg,
                               tfInput='x:0',
                               tfLabel='y:0',
                               tfOutput='out:0',
                               tfOptimizer='adam',
                               miniBatchSize=300,
                               miniStochasticIters=1,
                               shufflePerIter=True,
                               iters=50,
                               predictionCol='predicted',
                               labelCol='labels',
                               partitions=4,
                               verbose=1,
                               optimizerOptions=adam_config)

    # Create and save the Pipeline
    p = Pipeline(stages=[vector_assembler, encoder, spark_model]).fit(df)
    p.save('simple_dnn')

    # Example of loading the pipeline
    loaded_pipeline = PysparkPipelineWrapper.unwrap(
        PipelineModel.load('simple_dnn'))

    # Run predictions and evaluation
    predictions = loaded_pipeline.transform(df)
    evaluator = MulticlassClassificationEvaluator(labelCol="_c0",
                                                  predictionCol="predicted",
                                                  metricName="accuracy")
    accuracy = evaluator.evaluate(predictions)
    print("Test Error = %g" % (1.0 - accuracy))