def test_serialization(): spark_model = SparkMLlibModel(model, frequency='epoch', mode='synchronous', num_workers=2) spark_model.save("test.h5") load_spark_model("test.h5")
def test_serialization(classification_model): rms = RMSprop() classification_model.compile(rms, 'categorical_crossentropy', ['acc']) spark_model = SparkMLlibModel( classification_model, frequency='epoch', mode='synchronous', num_workers=2) spark_model.save("test.h5") loaded_model = load_spark_model("test.h5") assert loaded_model.master_network.to_yaml()
def test_mllib_model(spark_context): # Build RDD from numpy features and labels lp_rdd = to_labeled_point(spark_context, x_train, y_train, categorical=True) # Initialize SparkModel from Keras model and Spark context spark_model = SparkMLlibModel(model=model, frequency='epoch', mode='synchronous') # Train Spark model spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1])
def test_mllib_model(spark_context, classification_model, mnist_data): rms = RMSprop() classification_model.compile(rms, 'categorical_crossentropy', ['acc']) x_train, y_train, x_test, y_test = mnist_data x_train = x_train[:1000] y_train = y_train[:1000] # Build RDD from numpy features and labels lp_rdd = to_labeled_point(spark_context, x_train, y_train, categorical=True) # Initialize SparkModel from tensorflow.keras model and Spark context spark_model = SparkMLlibModel( model=classification_model, frequency='epoch', mode='synchronous') # Train Spark model spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) assert score
model.add(Activation('softmax')) # Compile model rms = RMSprop() model.compile(rms, "categorical_crossentropy", ['acc']) # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') sc = SparkContext(conf=conf) # Build RDD from numpy features and labels lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) # Initialize SparkModel from tensorflow.keras model and Spark context spark_model = SparkMLlibModel(model=model, frequency='epoch', mode='synchronous') # Train Spark model spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', score[1])
model.add(Dropout(0.2)) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax')) # Compile model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"]) # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') #sc = SparkContext(conf=conf) # Build RDD from numpy features and labels lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes) # Initialize SparkModel from Keras model and Spark context adagrad = elephas_optimizers.Adagrad() spark_model = SparkMLlibModel(sc, model, optimizer=adagrad, frequency='batch', mode='asynchronous', num_workers=4) # Train Spark model spark_model.train(lp_rdd, nb_epoch=nb_epoch, batch_size=batch_size, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model loss, acc = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', acc)
model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax')) # Compile model rms = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=rms) # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') sc = SparkContext(conf=conf) # Build RDD from numpy features and labels lp_rdd = to_labeled_point(sc, X_train, Y_train, categorical=True) print(lp_rdd.first()) rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes) rdd = rdd.repartition(4) rdd.first() # Initialize SparkModel from Keras model and Spark context adadelta = elephas_optimizers.Adadelta() spark_model = SparkMLlibModel(sc,model, optimizer=adadelta, frequency='batch', mode='asynchronous', num_workers=2) # Train Spark model spark_model.train(lp_rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model score = spark_model.get_network().evaluate(X_test, Y_test, show_accuracy=True, verbose=2) print('Test accuracy:', score[1])
rms = RMSprop() # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') sc = SparkContext(conf=conf) # Build RDD from numpy features and labels lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes) # Initialize SparkModel from Keras model and Spark context adadelta = elephas_optimizers.Adadelta() spark_model = SparkMLlibModel(sc, model, optimizer=adadelta, frequency='batch', mode='asynchronous', num_workers=2, master_optimizer=rms) # Train Spark model spark_model.train(lp_rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
outputCol="tf_features", vocabSize=input_dim) # IDF idf = sf.IDF(inputCol="tf_features", outputCol="features") label_string = sf.StringIndexer(inputCol="first_label", outputCol="label") pipeline_dl = Pipeline(stages=[cv, idf, label_string]) df = pipeline_dl.fit(training_set).transform(training_set) df = df.rdd.map(lambda x: (LabeledPoint(x[ 'label'], MLLibVectors.fromML(x['features'])))) logger.info("Pipeline created ...") logger.info("Transforms the text into tf idf RDD ...") model = create_keras_model(input_dim, output_dim) logger.info("Starts Training ...") spark_model = SparkMLlibModel(model=model, frequency='epoch', mode='asynchronous', parameter_server_mode='socket') spark_model.fit(df, epochs=epochs, batch_size=132, verbose=1, validation_split=0.2, categorical=True, nb_classes=output_dim) logger.info("Training done") spark_model._master_network.save(save_dir + model_dir + "/" + filename) logger.info("Program ended succesfully ! Find the model at :" + save_dir + model_dir + "/" + filename)
# Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') sc = SparkContext(conf=conf) # Build RDD from numpy features and labels lp_rdd = to_labeled_point(sc, X_train, Y_train, categorical=True) print(lp_rdd.first()) rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes) rdd = rdd.repartition(4) rdd.first() # Initialize SparkModel from Keras model and Spark context adadelta = elephas_optimizers.Adadelta() spark_model = SparkMLlibModel(sc, model, optimizer=adadelta, frequency='batch', mode='asynchronous', num_workers=2) # Train Spark model spark_model.train(lp_rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model score = spark_model.get_network().evaluate(X_test, Y_test,
metrics=["accuracy"]) # Create Spark context conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') #sc = SparkContext(conf=conf) # Build RDD from numpy features and labels lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes) # Initialize SparkModel from Keras model and Spark context adagrad = elephas_optimizers.Adagrad() spark_model = SparkMLlibModel(sc, model, optimizer=adagrad, frequency='batch', mode='asynchronous', num_workers=4) # Train Spark model spark_model.train(lp_rdd, nb_epoch=nb_epoch, batch_size=batch_size, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes) # Evaluate Spark model by evaluating the underlying model loss, acc = spark_model.master_network.evaluate(x_test, y_test, verbose=2) print('Test accuracy:', acc)