Python Pipeline.write Examples

Programming Language: Python

Namespace/Package Name: pyspark.ml.pipeline

Class/Type: Pipeline

Method/Function: write

Examples at hotexamples.com: 7

Python Pipeline.write - 7 examples found. These are the top rated real world Python examples of pyspark.ml.pipeline.Pipeline.write extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Pipeline(30)

fit(30)

write(7)

save(5)

transform(4)

load(3)

setStages(3)

explainParams(2)

getStages(1)

Example #1

Show file

    def test_pipeline(self, bag):
        from pyspark.ml.pipeline import Pipeline
        # create and save and load
        pth = "/tmp/spatial-join"
        new_p = Pipeline().setStages([bag["transformer"]])
        new_p.write().overwrite().save(pth)
        saved_p = Pipeline.load(pth)

        # check transformations
        inp = bag["input"]
        exp = bag["expected"]
        check(new_p.fit(inp), inp, exp)
        check(saved_p.fit(inp), inp, exp)

Example #2

Show file

 def test_save_pipeline(self):
     processed = self.generate_random_data()
     mg = build_graph(SparkFlowTests.create_random_model)
     spark_model = SparkAsyncDL(inputCol='features',
                                tensorflowGraph=mg,
                                tfInput='x:0',
                                tfLabel='y:0',
                                tfOutput='outer/Sigmoid:0',
                                tfOptimizer='adam',
                                tfLearningRate=.1,
                                iters=20,
                                partitions=2,
                                predictionCol='predicted',
                                labelCol='label')
     p = Pipeline(stages=[spark_model]).fit(processed)
     p.write().overwrite().save('example_pipeline')
     p = PysparkPipelineWrapper.unwrap(
         PipelineModel.load('example_pipeline'))
     data = p.transform(processed).take(10)
     nb_errors = SparkFlowTests.calculate_errors(data)
     self.assertTrue(nb_errors < len(data))

Example #3

Show file

File: train.py Project: jimmy-academia/Stock-Spark

    mg = build_graph(small_model)
    #Assemble and one hot encode
    va = VectorAssembler(inputCols=final_df.columns[1:151],
                         outputCol='features')
    encoded = OneHotEncoder(inputCol='result',
                            outputCol='labels',
                            dropLast=False)
    adam_config = build_adam_config(learning_rate=0.001,
                                    beta1=0.9,
                                    beta2=0.999)

    spark_model = SparkAsyncDL(inputCol='features',
                               tensorflowGraph=mg,
                               tfInput='x:0',
                               tfLabel='y:0',
                               tfOutput='out:0',
                               tfLearningRate=.001,
                               iters=20,
                               predictionCol='predicted',
                               labelCol='labels',
                               verbose=1,
                               optimizerOptions=adam_config)

    ckptpath = os.path.join(ckptdir, task)
    print('save model in ckptpath')
    p = Pipeline(stages=[va, encoded, spark_model]).fit(final_df)
    p.write().overwrite().save(ckptpath)

    print('===task all done===')

Example #4

Show file

File: simple_cnn.py Project: zhangqianjin/sparktorch

                                       outputCol='features')

    # Demonstration of some options. Not all are required
    # Note: This uses the barrier execution mode, which is sensitive to the number of partitions
    spark_model = SparkTorch(inputCol='features',
                             labelCol='_c0',
                             predictionCol='predictions',
                             torchObj=torch_obj,
                             iters=50,
                             verbose=1,
                             validationPct=0.2,
                             miniBatch=128)

    # Create and save the Pipeline
    p = Pipeline(stages=[vector_assembler, spark_model]).fit(df)
    p.write().overwrite().save('simple_cnn')

    # Example of loading the pipeline
    loaded_pipeline = PysparkPipelineWrapper.unwrap(
        PipelineModel.load('simple_cnn'))

    # Run predictions and evaluation
    predictions = loaded_pipeline.transform(df).persist()

    evaluator = MulticlassClassificationEvaluator(labelCol="_c0",
                                                  predictionCol="predictions",
                                                  metricName="accuracy")

    accuracy = evaluator.evaluate(predictions)
    print("Train accuracy = %g" % accuracy)

Example #5

Show file

File: model_save.py Project: anhuaxiang/spark_ml_pipeline_save_pmml_outler

        df = df.withColumn(f, df[f].cast('int'))
    if f == 'class':
        df = df.withColumn(f, df[f].cast('string'))
df = df.dropna()

train, test = df.randomSplit([0.8, 0.2], seed=0)

class_index = StringIndexer(inputCol='class', outputCol='label')
vector = VectorAssembler(inputCols=feature_cols, outputCol='feature')
model = LinearSVC(featuresCol='feature', labelCol='label')
pipeline = Pipeline(stages=[class_index, vector, model])

pipeline = pipeline.fit(train)
if os.path.exists(MODEL_SAVE_PATH):
    shutil.rmtree(MODEL_SAVE_PATH)
pipeline.write().overwrite().save(pipeline)  # pipeline.save('/to/path')

load_pipeline = PipelineModel.load('pipeline')
test_predict = load_pipeline.transform(test)

evaluator = BinaryClassificationEvaluator(rawPredictionCol='rawPrediction',
                                          labelCol='label')

print(evaluator.evaluate(test_predict, {evaluator.metricName: 'areaUnderROC'}))
print(evaluator.evaluate(test_predict, {evaluator.metricName: 'areaUnderPR'}))

origin_test_df = df.select(feature_cols)

predict_df = load_pipeline.transform(origin_test_df)
print(predict_df.show(20))

Example #6

Show file

File: sparkflow_test.py Project: maksymx/ml_dl_ds_cv_snippets

def small_model():
    x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
    y = tf.placeholder(tf.float32, shape=[None, 10], name='y')
    layer1 = tf.layers.dense(x, 256, activation=tf.nn.relu)
    layer2 = tf.layers.dense(layer1, 256, activation=tf.nn.relu)
    out = tf.layers.dense(layer2, 10)
    z = tf.argmax(out, 1, name='out')
    loss = tf.losses.softmax_cross_entropy(y, out)
    return loss


df = spark.read.option("inferSchema", "true").csv('mnist_train.csv')
mg = build_graph(small_model)
# Assemble and one hot encode
va = VectorAssembler(inputCols=df.columns[1:785], outputCol='features')
encoded = OneHotEncoder(inputCol='_c0', outputCol='labels', dropLast=False)

spark_model = SparkAsyncDL(inputCol='features',
                           tensorflowGraph=mg,
                           tfInput='x:0',
                           tfLabel='y:0',
                           tfOutput='out:0',
                           tfLearningRate=.001,
                           iters=20,
                           predictionCol='predicted',
                           labelCol='labels',
                           verbose=1)

p = Pipeline(stages=[va, encoded, spark_model]).fit(df)
p.write().overwrite().save("location")

Example #7

Show file

File: dnn_example.py Project: masmarkus/SparkFlowDeepLearning

    tfOutput='out:0',
    tfLearningRate=.001,
    iters=10,
    predictionCol='predicted',
    labelCol='labels',
    verbose=1)



if __name__ == "__main__":

    #Pipeline definition
    #pipe = [va, encoded, spark_model]

    #Train the model
    try:
        import time
        start_time = time.time()
        p = Pipeline(stages=[va, encoded, spark_model]).fit(df)
        print("--- %s seconds ---" % (time.time() - start_time))

        p.write().save("dnn_model")
    except Exception as e:
        print ("Error --> ", e)

    #Save the model
    #p.write().overwrite().save("dnn_model")


    #exec(open("dnn_model.py").read())