Example #1
0
def preparar_RDD(seq_len = 0):
  from elephas.utils.rdd_utils import to_simple_rdd
  from os import rename as os_rename
  for nF in range(1, 99): # 1,...,(n-1)
    fichtr = 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv'
    if os_path_isfile(s_input_path + fichtr):
      print('Leyendo ficheros train+valid ' + str(nF) + ' - numAds ' + str(seq_len) + '...')
      X_train = read_csv(s_input_path + 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=np_float64, header = None).values
      y_train = read_csv(s_input_path + 'clicks_y_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=int, header = None).values
      X_valid = read_csv(s_input_path + 'clicks_X_valid_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=np_float64, header = None).values
      y_valid = read_csv(s_input_path + 'clicks_y_valid_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv', dtype=int, header = None).values
      print(X_train.shape, y_train.shape, X_valid.shape, y_valid.shape)
      X_train, y_train = mi_reshape(X_train, to_categorical(y_train), seq_len)
      X_valid, y_valid = mi_reshape(X_valid, to_categorical(y_valid), seq_len)
      X_train = np_concat((X_train, X_valid), axis=0) # Incluimos validset dentro del trainset en Spark
      y_train = np_concat((y_train, y_valid), axis=0) # Incluimos validset dentro del trainset en Spark
      print(X_train.shape, y_train.shape)
      print('Creando RDD (train+valid) ' + str(nF) + ' - numAds ' + str(seq_len) + '...')
      rdd_ini = to_simple_rdd(sc, X_train, y_train)
      # Convertimos ndarray [ i.e. array(...) ] en list [ i.e. [...] ]:
      rdd_lista = rdd_ini.map(lambda i: map(lambda s: s.tolist(), i))
      # Y ahora guardamos como txt:
      rdd_lista.coalesce(numSparkWorkers, True).saveAsTextFile(s_spark_inputpath + 'clicks_train_seq' + str(seq_len) + '-' + str(nF) + '_rdd') # Forzamos a guardarlo en 4 trozos (al menos)
      print('Ok. Guardado en HDFS el RDD (train+valid) ' + str(nF) + ' - numAds ' + str(seq_len) + '.')
  os_rename(s_input_path + fichtr, s_input_path + 'ok_en_hdfs/' + 'clicks_X_train_' + str(seq_len) + '-' + str(nF) + '_para_spark.csv')
Example #2
0
def test_spark_model_end_to_end(spark_context):
    rdd = to_simple_rdd(spark_context, x_train, y_train)

    # sync epoch
    spark_model = SparkModel(model, frequency='epoch', mode='synchronous', num_workers=2)
    spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1)
    score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
    print('Test accuracy:', score[1])

    # sync batch
    spark_model = SparkModel(model, frequency='batch', mode='synchronous', num_workers=2)
    spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1)
    score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
    print('Test accuracy:', score[1])

    # async epoch
    spark_model = SparkModel(model, frequency='epoch', mode='asynchronous')
    spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1)
    score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
    print('Test accuracy:', score[1])

    # hogwild epoch
    spark_model = SparkModel(model, frequency='epoch', mode='hogwild')
    spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1)
    score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
    print('Test accuracy:', score[1])
Example #3
0
def main():
    gps_files = glob.glob('../data/prototype/**/gps_points.csv')
    trip_files = glob.glob('../data/prototype/**/gps_trips.csv')

    file_results = process_file(trip_file = trip_files[0], gps_file = gps_files[0])
    seq_results = build_seq(input_df = file_results['df'], unique_trips = file_results['unique_trips'])

    X = seq_results['x']
    y = seq_results['y']

    print('Bulding training data from files..')
    for i in range(1, len(gps_files)):
        file_results = process_file(trip_file = trip_files[i], gps_file = gps_files[i])
        seq_results = build_seq(input_df = file_results['df'], unique_trips = file_results['unique_trips'])

        X = np.vstack((X, seq_results['x']))
        y = np.vstack((y, seq_results['y']))

    x_train, x_val, y_train, y_val = train_test_split(X, y, random_state=1, train_size=0.8)

    rdd = to_simple_rdd(sc, x_train, y_train)

    model = build_model()

    spark_model = SparkModel(model, frequency='epoch', mode='asynchronous')

    spark_model.fit(rdd, epochs=5, batch_size=32, verbose=0, validation_split=0.1)
#    model.fit(x_train, y_train, epochs=5, validation_data=(x_val, y_val))

    y_pred = spark_model.predict(x_val)

    acc = sum(np.argmax(y_pred, axis=1) == np.argmax(y_val, axis=1)) / y_pred.shape[0]

    print("Validation Accuracy: {number:.{digits}f}%".format(number=(acc*100), digits=2))
    def fit(self, df):
        if hasattr(self._model, 'server'):
            self._model.server.terminate()
        pdf = df.toPandas()

        rdd = to_simple_rdd(self._spark.sparkContext, pdf[self._features], pdf[self._labels])
        self._model.train(rdd, self._epoch, self._batch_size, 0, 0.1)
Example #5
0
def predictMain(modelName,sc):
    timeSteps= 30                                                                   # No of past values that has to be used for Training purpose
    print "Going to Initialize the LSTM model"
    SMARTparameters=getSMARTParameters()
    print("The following are the SMART parameters:",SMARTparameters)
    lstm = ls.cloudLSTM(timeSteps=timeSteps,parms=SMARTparameters)                  # Initializing the DiskPrediction Model(LSTM Model)
    print "Initialized the Model"
    lstmModel = lstm.get_LSTM_Model()                   			    # Obtaining the LSTM model for initializing SparkModel Class
    trainSize= 0.2                                                                  # Fraction of input used for Training purpose
    acc = 0.0                                                                       # Model accuracy
    inputFilePath = os.environ.get('DATA_FILE_PATH')                                # Get the Input CSV filepath from environment
    year=sys.argv[1]                                                                # get the year from the Command Line arguments
    month=sys.argv[2]                                                               # get the month from the Command Line arguments
    inputFilePath=inputFilePath+str(year)+"/"+str(year)+"-"+str(month)+"*.csv"  # For E.g "/home/user/Desktop/Cloud/Test/2014/2014-11*.csv"
    print("InputPath",inputFilePath)
    rd.generate_DataFrame(inputFilePath,SMARTparameters)
    inputCSVFilePath = os.environ.get('MODEL_CSV_FILEPATH')+str(modelName)+".csv"    # For E.g "/hadoop/elephas/Output/ST4000DM000.csv"

    modelFeatures = pd.read_csv(filepath_or_buffer=inputCSVFilePath,usecols=SMARTparameters)
    modelLabel = pd.read_csv(filepath_or_buffer=inputCSVFilePath,usecols=['failure'])   #"/hadoop/elephas/Output/ST4000DM000.csv"

    # Removing Not A Number values from the Input Dataframe
    modelFeatures = modelFeatures.fillna(0)
    modelLabel = modelLabel.fillna(0)

    # Obtaining 3D training and testing vectors
    (feature_train, label_train), (feature_test, label_test) = lstm.train_test_split(modelFeatures,modelLabel,trainSize,timeSteps)

    # Condition to check whether the failure cases exists in the data
    if len(feature_train)==0:
        print("DiskModel has no failure eleements. Training of the model cannot proceed!!")
        return
    # Initializing the Adam Optimizer for Elephas
    adam = elephas_optimizers.Adam()
    print "Adam Optimizer initialized"
    #Converting Dataframe to Spark RDD
    rddataset = to_simple_rdd(sc, feature_train, label_train)
    print "Training data converted into Resilient Distributed Dataset"
    #Initializing the SparkModel with Optimizer,Master-Worker Mode and Number of Workers
    spark_model = SparkModel(sc,lstmModel,optimizer=adam ,frequency='epoch', mode='asynchronous', num_workers=2)
    print "Spark Model Initialized"
    #Initial training run of the model
    spark_model.train(rddataset, nb_epoch=10, batch_size=200, verbose=1, validation_split=0)
    # Saving the model
    score = spark_model.evaluate(feature_test, label_test,show_accuracy=True)

    while(score <= 0.5):
        # Training the Input Data set
        spark_model.train(rddataset, nb_epoch=10, batch_size=200, verbose=1, validation_split=0)
        print "LSTM model training done !!"
        score = spark_model.evaluate(feature_test, label_test,show_accuracy=True)
    print "Saving weights!!"
    outFilePath=os.environ.get('GATOR_SQUAD_HOME')
    outFilePath=outFilePath+"Weights/"+str(year)+"/"+str(month)+"/"+str(modelName)+"_my_model_weights.h5"
    spark_model.save_weights(outFilePath)
    print "LSTM model testing commencing !!"
    predicted1=spark_model.predict_classes(feature_test)
    df_confusion = pd.crosstab(label_test.flatten(), predicted1.flatten(), rownames=['Actual'], colnames=['Predicted'], margins=True)
    print df_confusion
Example #6
0
 def __init__(self, x_train, y_train, x_test, y_test, spark_context=None):
     self.x_train = x_train
     self.y_train = y_train
     self.x_test = x_test
     self.y_test = y_test
     if (spark_context is not None):
         self.rdd = to_simple_rdd(spark_context, x_train, y_train)
     else:
         self.rdd = None
Example #7
0
def test_to_simple_rdd(spark_context):
    features = np.ones((5, 10))
    labels = np.ones((5, ))
    rdd = rdd_utils.to_simple_rdd(spark_context, features, labels)

    assert rdd.count() == 5
    first = rdd.first()
    assert first[0].shape == (10, )
    assert first[1] == 1.0
Example #8
0
def test_training_classification(spark_context, mode, parameter_server_mode,
                                 mnist_data, classification_model):
    # Define basic parameters
    batch_size = 64
    epochs = 10

    # Load data
    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]

    sgd = SGD(lr=0.1)
    classification_model.compile(sgd, 'categorical_crossentropy', ['acc'])

    # Build RDD from numpy features and labels
    rdd = to_simple_rdd(spark_context, x_train, y_train)

    # Initialize SparkModel from keras model and Spark context
    spark_model = SparkModel(classification_model,
                             frequency='epoch',
                             mode=mode,
                             parameter_server_mode=parameter_server_mode,
                             port=4000 + random.randint(0, 500))

    # Train Spark model
    spark_model.fit(rdd,
                    epochs=epochs,
                    batch_size=batch_size,
                    verbose=0,
                    validation_split=0.1)

    # run inference on trained spark model
    predictions = spark_model.predict(x_test)
    # run evaluation on trained spark model
    evals = spark_model.evaluate(x_test, y_test)

    # assert we can supply rdd and get same prediction results when supplying numpy array
    test_rdd = spark_context.parallelize(x_test)
    assert [np.argmax(x) for x in predictions
            ] == [np.argmax(x) for x in spark_model.predict(test_rdd)]

    # assert we get the same prediction result with calling predict on keras model directly
    assert [np.argmax(x) for x in predictions] == [
        np.argmax(x) for x in spark_model.master_network.predict(x_test)
    ]

    # assert we get the same evaluation results when calling evaluate on keras model directly
    assert isclose(evals[0],
                   spark_model.master_network.evaluate(x_test, y_test)[0],
                   abs_tol=0.01)
    assert isclose(evals[1],
                   spark_model.master_network.evaluate(x_test, y_test)[1],
                   abs_tol=0.01)
def test_sync_mode(spark_context):
    # Define basic parameters
    batch_size = 64
    nb_classes = 10
    epochs = 10

    # Load data
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train = x_train.reshape(60000, 784)
    x_test = x_test.reshape(10000, 784)
    x_train = x_train.astype("float32")
    x_test = x_test.astype("float32")
    x_train /= 255
    x_test /= 255
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')

    # Convert class vectors to binary class matrices
    y_train = np_utils.to_categorical(y_train, nb_classes)
    y_test = np_utils.to_categorical(y_test, nb_classes)

    model = Sequential()
    model.add(Dense(128, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(10))
    model.add(Activation('softmax'))

    sgd = SGD(lr=0.1)
    model.compile(sgd, 'categorical_crossentropy', ['acc'])

    # Build RDD from numpy features and labels
    rdd = to_simple_rdd(spark_context, x_train, y_train)

    # Initialize SparkModel from Keras model and Spark context
    spark_model = SparkModel(model, mode='synchronous')

    # Train Spark model
    spark_model.fit(rdd,
                    epochs=epochs,
                    batch_size=batch_size,
                    verbose=2,
                    validation_split=0.1)

    # Evaluate Spark model by evaluating the underlying model
    score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
    assert score[1] >= 0.70
Example #10
0
def test_training_regression(spark_context, mode, parameter_server_mode,
                             boston_housing_dataset, regression_model):
    x_train, y_train, x_test, y_test = boston_housing_dataset
    rdd = to_simple_rdd(spark_context, x_train, y_train)

    # Define basic parameters
    batch_size = 64
    epochs = 10
    sgd = SGD(lr=0.0000001)
    regression_model.compile(sgd, 'mse', ['mae'])
    spark_model = SparkModel(regression_model,
                             frequency='epoch',
                             mode=mode,
                             parameter_server_mode=parameter_server_mode,
                             port=4000 + random.randint(0, 500))

    # Train Spark model
    spark_model.fit(rdd,
                    epochs=epochs,
                    batch_size=batch_size,
                    verbose=0,
                    validation_split=0.1)

    # run inference on trained spark model
    predictions = spark_model.predict(x_test)
    # run evaluation on trained spark model
    evals = spark_model.evaluate(x_test, y_test)

    # assert we can supply rdd and get same prediction results when supplying numpy array
    test_rdd = spark_context.parallelize(x_test)
    assert all(
        np.isclose(x, y, 0.01)
        for x, y in zip(predictions, spark_model.predict(test_rdd)))

    # assert we get the same prediction result with calling predict on keras model directly
    assert all(
        np.isclose(x, y, 0.01) for x, y in zip(
            predictions, spark_model.master_network.predict(x_test)))

    # assert we get the same evaluation results when calling evaluate on keras model directly
    assert isclose(evals[0],
                   spark_model.master_network.evaluate(x_test, y_test)[0],
                   abs_tol=0.01)
    assert isclose(evals[1],
                   spark_model.master_network.evaluate(x_test, y_test)[1],
                   abs_tol=0.01)
Example #11
0
def dist_training(n_iter):
    sbcnn = SBCNN_Model(field_size, bands, frames, num_channels, num_labels)

    sgd = SGD(lr=0.001, momentum=0.0, decay=0.0, nesterov=False)
    sbcnn.compile(loss='categorical_crossentropy',
                  metrics=['accuracy'],
                  optimizer=sgd)

    train_arr, train_labels_arr, test_arr, test_labels_arr = get_data()
    rdd = to_simple_rdd(sc, train_arr, train_labels_arr)

    spark_model = SparkModel(sbcnn, frequency='epoch', mode='asynchronous')
    spark_model.fit(rdd,
                    epochs=n_iter,
                    batch_size=32,
                    verbose=0,
                    validation_split=0.1)

    score = spark_model.master_network.evaluate(test_arr,
                                                test_labels_arr,
                                                verbose=2)
    print('Test accuracy:', score[1])
Example #12
0
def train_elephas_model(x, y):
    model = models.Sequential()

    # Input Layer
    sgd = optimizers.Adam(lr=0.01)
    model.add(Dense(256, activation="relu", input_shape=(x.shape[1],)))
    model.add(Dropout(0.05))

    model.add(Dense(256, activation="relu", input_shape=(x.shape[1],)))
    model.add(Dropout(0.05))

    # output layer
    model.add(Dense(1))
    model.compile(optimizer=sgd, loss="mse", metrics=["mse"])
    model.summary()

    rdd = to_simple_rdd(sc, x, y)
    spark_model = SparkModel(model, frequency='epoch', mode='asynchronous')
    # spark_model.fit(rdd, epochs=10, batch_size=64, verbose=1, validation_split=0.2)
    spark_model.fit(rdd, epochs=25, batch_size=64, verbose=1, validation_split=0.2)

    return spark_model
Example #13
0
    print('Training data : x')
    print(type(x_train))
    print(x_train)
    print('Training data : y')
    print(type(y_train))
    print(y_train)

    print('Test data : x')
    print(type(x_test))
    print(x_test)
    print('Test data : y')
    print(type(y_test))
    print(y_test)

    print('Converting training data to RDD')
    rddataset = to_simple_rdd(sc, x_train, y_train)

    print('Initializing SPark Model')
    sgd = elephas_optimizers.SGD()
    spark_model = SparkModel(sc,
                             model,
                             optimizer=sgd,
                             frequency='epoch',
                             mode='asynchronous',
                             num_workers=2)

    print('Commencing training')
    spark_model.train(rddataset,
                      nb_epoch=10,
                      batch_size=200,
                      verbose=1,
Example #14
0
y_test = np_utils.to_categorical(y_test, nb_classes)

model = Sequential()
model.add(Dense(128, input_dim=784))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

sgd = SGD(lr=0.1)

# Build RDD from numpy features and labels
rdd = to_simple_rdd(sc, x_train, y_train)

# Initialize SparkModel from Keras model and Spark context
adagrad = elephas_optimizers.Adagrad()
spark_model = SparkModel(sc,
                         model,
                         optimizer=adagrad,
                         frequency='epoch',
                         mode='asynchronous',
                         num_workers=2,
                         master_optimizer=sgd)

# Train Spark model
spark_model.train(rdd,
                  nb_epoch=nb_epoch,
                  batch_size=batch_size,
from pyspark.sql.window import Window
for i in range(2, 3):
    col1 = "Price%d" % (i - 1)
    col2 = "Price%d" % i
    w = Window().partitionBy().orderBy(col("Date_Time"))
    FinalDf = FinalDf.select("*", lag(col1).over(w).alias(col2)).na.drop()
    FinalDf.show()

FinalDf = FinalDf.selectExpr('Date_Time', 'Sentiment_score',
                             'Price2 as Input_price', 'Price1 as Price')

# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))

rdd = to_simple_rdd(sc, train_X, train_y)
rdd.count()

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
#from keras.models import model_from_yaml,slice_X

from keras.utils import np_utils
from elephas.spark_model import SparkModel
from elephas.utils.rdd_utils import to_simple_rdd
from elephas import optimizers as elephas_optimizers

model = Sequential()
model.add(LSTM(5, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
Example #16
0
    print("Creating Training and Test Data")
    ((x_train, y_train), (x_test, y_test)) = train_test_split(testinput.fillna(0), testoutput.fillna(0), test_size=0.3)

    print("Training data : x")
    print(type(x_train))
    print(x_train)
    print("Training data : y")
    print(type(y_train))
    print(y_train)

    print("Test data : x")
    print(type(x_test))
    print(x_test)
    print("Test data : y")
    print(type(y_test))
    print(y_test)

    print("Converting training data to RDD")
    rddataset = to_simple_rdd(sc, x_train, y_train)

    print("Initializing SPark Model")
    sgd = elephas_optimizers.SGD()
    spark_model = SparkModel(sc, model, optimizer=sgd, frequency="epoch", mode="asynchronous", num_workers=2)

    print("Commencing training")
    spark_model.train(rddataset, nb_epoch=10, batch_size=200, verbose=1, validation_split=0)
    # model.fit(x_train, y_train, nb_epoch=5, batch_size=32)
    print("Training completed")

    sc.stop()
Example #17
0
model.add(Activation('relu'))
model.add(Convolution2D(nb_filters, nb_conv, nb_conv))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adadelta')

## spark
conf = SparkConf().setAppName(APP_NAME).setMaster(MASTER_IP)
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
rdd = to_simple_rdd(sc, X_train, Y_train)

# Initialize SparkModel from Keras model and Spark context
spark_model = SparkModel(sc,model)

# Train Spark model
spark_model.train(rdd, nb_epoch=nb_epoch, batch_size=batch_size, verbose=0, validation_split=0.1, num_workers=24)

# Evaluate Spark model by evaluating the underlying model
score = spark_model.get_network().evaluate(X_test, Y_test, show_accuracy=True, verbose=2)
print('Test accuracy:', score[1])
Example #18
0
model = Sequential()
model.add(Dense(128, input_dim=784))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

# Compile model
sgd = SGD(lr=0.1)
model.compile(loss='categorical_crossentropy', optimizer=sgd)

# Build RDD from numpy features and labels
rdd = to_simple_rdd(sc, x_train, y_train)

# Initialize SparkModel from Keras model and Spark context
adagrad = elephas_optimizers.Adagrad()
spark_model = SparkModel(sc,
                         model,
                         optimizer=adagrad,
                         frequency='epoch',
                         mode='asynchronous',
                         num_workers=2)

# Train Spark model
spark_model.train(rdd,
                  nb_epoch=nb_epoch,
                  batch_size=batch_size,
                  verbose=2,
Example #19
0
model = Sequential()
model.add(Dense(784, 128))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(128, 128))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(128, 10))
model.add(Activation('softmax'))

# Compile model
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
rdd = to_simple_rdd(sc, X_train, Y_train)

# Initialize SparkModel from Keras model and Spark context
spark_model = SparkModel(sc,model)

# Train Spark model
spark_model.train(rdd, nb_epoch=nb_epoch, batch_size=batch_size, verbose=0, validation_split=0.1, num_workers=8)

# Evaluate Spark model by evaluating the underlying model
score = spark_model.get_network().evaluate(X_test, Y_test, show_accuracy=True, verbose=2)
print('Test accuracy:', score[1])
Example #20
0
# # normalize the dataset
# scaler = MinMaxScaler(feature_range=(0, 1))
# inputdata = scaler.fit_transform(inputdata)
# split into train and test sets
train_size = int(len(inputdata) * 0.8)
test_size = len(inputdata) - train_size
train, test = inputdata[0:train_size,:], inputdata[train_size:len(inputdata),:]
# reshape into X=t and Y=t+1
look_back = 2
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
rdd = to_simple_rdd(spark.sparkContext, trainX, trainY)

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimazer='adam')
model.fit(trainX, trainY, epochs=300, batch_size=1, verbose=2)

# adam = elephas_optimizers.Adam()
#
# spark_model = SparkModel(spark.sparkContext, model, optimizer=adam, frequency='epoch', num_workers=2)
# spark_model.train(rdd, nb_epoch=50, batch_size=4, verbose=2, validation_split=0.1)


# make predictions
Example #21
0
labels = []
features = []

for message in consumer:
    #print(message.value)
    labels.append(message.value["label"])
    features.append(message.value["features"]["values"])

labeledpoints = np.array(labels, features)

model = Sequential()
model.add(Dense(2, input_dim=11))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer=SGD())

lp_rdd = to_simple_rdd(sc, features, labels, categorical=True)
spark_model = SparkModel(model, frequency='epoch', mode='asynchronous')
spark_model.fit(lp_rdd,
                epochs=20,
                batch_size=32,
                verbose=0,
                validation_split=0.1)

spark_model.save("model.h5")
Example #22
0
y_train = transformer.fit_transform(train['Target'].values.reshape(-1, 1))
del train['Target']
y_test = transformer.transform(test['Target'].values.reshape(-1, 1))
del test['Target']

model = Sequential()
model.add(Dense(18, input_dim=26))
model.add(Activation('sigmoid'))
model.add(Dense(6))
model.add(Activation('sigmoid'))
model.add(Dense(1))
model.add(Activation('sigmoid'))

spark = SparkSession.builder.appName('ElephasTest').getOrCreate()
rdd = to_simple_rdd(spark.sparkContext, train, y_train)

sgd = SGD(lr=0.1)
adagrad = elephas_optimizers.Adagrad()
spark_model = SparkModel(spark.sparkContext,
                         model,
                         optimizer=adagrad,
                         frequency='epoch',
                         mode='asynchronous',
                         master_loss='mse',
                         num_workers=2, master_optimizer=sgd)

# Train Spark model
spark_model.train(rdd, nb_epoch=nb_epoch, batch_size=batch_size, verbose=2, validation_split=0.1)

# Evaluate Spark model by evaluating the underlying model
Example #23
0
        if args.single_threaded_worker:
            conf = SparkConf().setAppName('tardis').setMaster('local')
        else:
            conf = SparkConf().setAppName('tardis').setMaster('local[*]')
        sc = SparkContext.getOrCreate(conf=conf)

        generator_config = deepcopy(args)
        generator_config.batch_size = 1024
        generator_config.target_vocab = target_vocab
        model_config.input_split_index = encoder_train_input.shape[1]
        training_generator = WMTSequence(encoder_train_input, decoder_train_input, decoder_train_target, model_config)

        for raw_train_input, decoder_train_target in training_generator:
            encoder_train_input, decoder_train_input = raw_train_input
            train_input = np.hstack((encoder_train_input, decoder_train_input))
            train_rdd = to_simple_rdd(sc, train_input, decoder_train_target)

            if args.ensemble:
                model = DistributedEnsembleSeq2Seq(model_config)
            else:
                model = DistributedSeq2Seq(model_config)

            spark_model = SparkModel(model.model,
                                     frequency='epoch',
                                     mode='synchronous',
                                     batch_size=args.batch_size,
                                     custom_objects={'EncoderSlice': EncoderSlice, 'DecoderSlice': DecoderSlice})

            spark_model.fit(train_rdd,
                            batch_size=model_config.batch_size,
                            epochs=model_config.epochs,
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('sigmoid'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Define elephas optimizer (which tells the model how to aggregate updates on the Spark master)
adadelta = elephas_optimizers.Adadelta()

from elephas.utils.rdd_utils import to_labeled_point
from elephas.utils.rdd_utils import to_simple_rdd
lp_rdd = to_simple_rdd(sc, features_train, labels_train)

#print(lp_rdd.take(5))

from elephas.spark_model import SparkModel
from elephas import optimizers as elephas_optimizers

adagrad = elephas_optimizers.Adagrad()
spark_model = SparkModel(sc,
                         model,
                         optimizer=adagrad,
                         frequency='epoch',
                         mode='asynchronous',
                         num_workers=8)
spark_model.train(lp_rdd,
                  nb_epoch=20,