Example #1
0
def test_predict_classes_probability(spark_context, classification_model,
                                     mnist_data):
    batch_size = 64
    nb_classes = 10
    epochs = 1

    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    df = to_data_frame(spark_context, x_train, y_train, categorical=True)
    test_df = to_data_frame(spark_context, x_test, y_test, categorical=True)

    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    sgd_conf = optimizers.serialize(sgd)

    # Initialize Spark ML Estimator
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(nb_classes)

    # Fitting a model returns a Transformer
    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)

    results = fitted_pipeline.transform(test_df)
    # we should have an array of 10 elements in the prediction column, since we have 10 classes
    # and therefore 10 probabilities
    assert len(results.take(1)[0].prediction) == 10
# Build RDD from numpy features and labels
df = to_data_frame(sc, x_train, y_train, categorical=True)
test_df = to_data_frame(sc, x_test, y_test, categorical=True)

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
sgd_conf = optimizers.serialize(sgd)

# Initialize Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(sgd_conf)
estimator.set_mode("synchronous")
estimator.set_loss("categorical_crossentropy")
estimator.set_metrics(['acc'])
estimator.set_epochs(epochs)
estimator.set_batch_size(batch_size)
estimator.set_validation_split(0.1)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[estimator])
fitted_pipeline = pipeline.fit(df)

# Evaluate Spark model by evaluating the underlying model
prediction = fitted_pipeline.transform(test_df)
pnl = prediction.select("label", "prediction")
pnl.show(100)

prediction_and_label = pnl.rdd.map(lambda row: (row.label, row.prediction))
metrics = MulticlassMetrics(prediction_and_label)
Example #3
0
model.compile(loss='categorical_crossentropy', optimizer='adam')

sgd = optimizers.SGD(lr=0.01)
sgd_conf = optimizers.serialize(sgd)

# Initialize Elephas Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(sgd_conf)
estimator.set_mode("synchronous")
estimator.set_loss("categorical_crossentropy")
estimator.set_metrics(['acc'])
estimator.setFeaturesCol("scaled_features")
estimator.setLabelCol("index_category")
estimator.set_epochs(10)
estimator.set_batch_size(128)
estimator.set_num_workers(1)
estimator.set_verbosity(0)
estimator.set_validation_split(0.15)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[string_indexer, scaler, estimator])
fitted_pipeline = pipeline.fit(train_df)

# Evaluate Spark model
prediction = fitted_pipeline.transform(train_df)
pnl = prediction.select("index_category", "prediction")
pnl.show(100)
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='sgd')

# Model Summary
model.summary()

# Initialize SparkML Estimator and Get Settings
estimator = ElephasEstimator()
estimator.setFeaturesCol("features")
estimator.setLabelCol("label_index")
estimator.set_keras_model_config(model.to_yaml())
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)
estimator.set_num_workers(1)
estimator.set_epochs(25)
estimator.set_batch_size(64)
estimator.set_verbosity(1)
estimator.set_validation_split(0.10)
estimator.set_optimizer_config(sgd)
estimator.set_mode("synchronous")
estimator.set_loss("binary_crossentropy")
estimator.set_metrics(['acc'])

# Create Deep Learning Pipeline
dl_pipeline = Pipeline(stages=[estimator])
print(dl_pipeline)


def dl_pipeline_fit_score_results(dl_pipeline=dl_pipeline,
                                  train_data=train_data,
                                  test_data=test_data,
Example #5
0
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')


# Initialize Elephas Spark ML Estimator
adagrad = elephas_optimizers.Adagrad()

estimator = ElephasEstimator()
estimator.setFeaturesCol("scaled_features")
estimator.setLabelCol("index_category")
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(adagrad.get_config())
estimator.set_nb_epoch(10)
estimator.set_batch_size(128)
estimator.set_num_workers(4)
estimator.set_verbosity(0)
estimator.set_validation_split(0.15)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[string_indexer, scaler, estimator])
fitted_pipeline = pipeline.fit(train_df)

from pyspark.mllib.evaluation import MulticlassMetrics
# Evaluate Spark model

prediction = fitted_pipeline.transform(train_df)
pnl = prediction.select("index_category", "prediction")
Example #6
0
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
df = to_data_frame(sc, x_train, y_train, categorical=True)
test_df = to_data_frame(sc, x_test, y_test, categorical=True)

# Define elephas optimizer
adadelta = elephas_optimizers.Adadelta()

# Initialize Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(adadelta.get_config())
estimator.set_nb_epoch(nb_epoch)
estimator.set_batch_size(batch_size)
estimator.set_num_workers(1)
estimator.set_verbosity(0)
estimator.set_validation_split(0.1)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[estimator])
fitted_pipeline = pipeline.fit(df)

# Evaluate Spark model by evaluating the underlying model
prediction = fitted_pipeline.transform(test_df)
pnl = prediction.select("label", "prediction")
pnl.show(100)
        adam = optimizers.nadam(lr=0.01)
        opt_conf = optimizers.serialize(adam)

        # Initialize SparkML Estimator and set all relevant properties
        estimator = ElephasEstimator()
        estimator.setFeaturesCol(
            "features")  # These two come directly from pyspark,
        estimator.setLabelCol("target")  # hence the camel case. Sorry :)
        estimator.set_keras_model_config(
            model.to_yaml())  # Provide serialized Keras model
        estimator.set_categorical_labels(True)
        estimator.set_nb_classes(num_classes)
        estimator.set_num_workers(
            10)  # We just use one worker here. Feel free to adapt it.
        estimator.set_epochs(2)  # was max-epochs
        estimator.set_batch_size(batch_size)  # was 128
        estimator.set_verbosity(2)  # was 1
        estimator.set_validation_split(0.15)
        estimator.set_optimizer_config(opt_conf)
        estimator.set_mode("synchronous")  # Was synchronous
        estimator.set_loss(mywloss)  # was("categorical_crossentropy")
        estimator.set_metrics(['accuracy'])

        buildModelElapsed = time.time() - start
        buildModelElapseCpu = time.clock() - startCpu

        start = time.time()
        startCpu = time.clock()

        pipeline = Pipeline(stages=[estimator])