def predictMain(modelName,sc): timeSteps= 30 # No of past values that has to be used for Training purpose print "Going to Initialize the LSTM model" SMARTparameters=getSMARTParameters() print("The following are the SMART parameters:",SMARTparameters) lstm = ls.cloudLSTM(timeSteps=timeSteps,parms=SMARTparameters) # Initializing the DiskPrediction Model(LSTM Model) print "Initialized the Model" lstmModel = lstm.get_LSTM_Model() # Obtaining the LSTM model for initializing SparkModel Class trainSize= 0.2 # Fraction of input used for Training purpose acc = 0.0 # Model accuracy inputFilePath = os.environ.get('DATA_FILE_PATH') # Get the Input CSV filepath from environment year=sys.argv[1] # get the year from the Command Line arguments month=sys.argv[2] # get the month from the Command Line arguments inputFilePath=inputFilePath+str(year)+"/"+str(year)+"-"+str(month)+"*.csv" # For E.g "/home/user/Desktop/Cloud/Test/2014/2014-11*.csv" print("InputPath",inputFilePath) rd.generate_DataFrame(inputFilePath,SMARTparameters) inputCSVFilePath = os.environ.get('MODEL_CSV_FILEPATH')+str(modelName)+".csv" # For E.g "/hadoop/elephas/Output/ST4000DM000.csv" modelFeatures = pd.read_csv(filepath_or_buffer=inputCSVFilePath,usecols=SMARTparameters) modelLabel = pd.read_csv(filepath_or_buffer=inputCSVFilePath,usecols=['failure']) #"/hadoop/elephas/Output/ST4000DM000.csv" # Removing Not A Number values from the Input Dataframe modelFeatures = modelFeatures.fillna(0) modelLabel = modelLabel.fillna(0) # Obtaining 3D training and testing vectors (feature_train, label_train), (feature_test, label_test) = lstm.train_test_split(modelFeatures,modelLabel,trainSize,timeSteps) # Condition to check whether the failure cases exists in the data if len(feature_train)==0: print("DiskModel has no failure eleements. Training of the model cannot proceed!!") return # Initializing the Adam Optimizer for Elephas adam = elephas_optimizers.Adam() print "Adam Optimizer initialized" #Converting Dataframe to Spark RDD rddataset = to_simple_rdd(sc, feature_train, label_train) print "Training data converted into Resilient Distributed Dataset" #Initializing the SparkModel with Optimizer,Master-Worker Mode and Number of Workers spark_model = SparkModel(sc,lstmModel,optimizer=adam ,frequency='epoch', mode='asynchronous', num_workers=2) print "Spark Model Initialized" #Initial training run of the model spark_model.train(rddataset, nb_epoch=10, batch_size=200, verbose=1, validation_split=0) # Saving the model score = spark_model.evaluate(feature_test, label_test,show_accuracy=True) while(score <= 0.5): # Training the Input Data set spark_model.train(rddataset, nb_epoch=10, batch_size=200, verbose=1, validation_split=0) print "LSTM model training done !!" score = spark_model.evaluate(feature_test, label_test,show_accuracy=True) print "Saving weights!!" outFilePath=os.environ.get('GATOR_SQUAD_HOME') outFilePath=outFilePath+"Weights/"+str(year)+"/"+str(month)+"/"+str(modelName)+"_my_model_weights.h5" spark_model.save_weights(outFilePath) print "LSTM model testing commencing !!" predicted1=spark_model.predict_classes(feature_test) df_confusion = pd.crosstab(label_test.flatten(), predicted1.flatten(), rownames=['Actual'], colnames=['Predicted'], margins=True) print df_confusion
class SparseGate(ModelFrame): def __init__(self, x_train, y_train, x_test, y_test, inputs, spark_context): ModelFrame.__init__(self, x_train, y_train, x_test, y_test, spark_context) self.gateModel = None self.inputs = inputs def gating_network(self): c1 = Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=self.x_train.shape[1:], name='gate1')(self.inputs) c2 = Activation('elu', name='gate2')(c1) c3 = BatchNormalization(name='gate3')(c2) c4 = Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), name='gate4')(c3) c5 = Activation('elu', name='gate5')(c4) c6 = BatchNormalization(name='gate6')(c5) c7 = MaxPooling2D(pool_size=(2, 2), name='gate7')(c6) c8 = Dropout(0.2, name='gate26')(c7) c9 = Conv2D(32 * 2, (3, 3), name='gate8', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(c8) c10 = Activation('elu', name='gate9')(c9) c11 = BatchNormalization(name='gate25')(c10) c12 = Conv2D(32 * 2, (3, 3), name='gate10', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(c11) c13 = Activation('elu', name='gate11')(c12) c14 = BatchNormalization(name='gate12')(c13) c15 = MaxPooling2D(pool_size=(2, 2), name='gate13')(c14) c16 = Dropout(0.3, name='gate14')(c15) c25 = Flatten(name='gate23')(c16) c26 = Dense(5, name='gate24', activation='elu')(c25) model = Model(inputs=self.inputs, outputs=c26) return model def create_gate_model(self, expert_models): gate_network = self.gating_network() merged = Lambda(lambda x: K.tf.transpose( sum( K.tf.transpose(x[i]) * x[0][:, i - 1] for i in range( 1, len(x)))))([gate_network.layers[-1].output] + [m.layers[-1].output for m in expert_models]) b = Activation('softmax', name='gatex')(merged) model = Model(inputs=self.inputs, outputs=b) model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) return model def train_gate(self, datagen, weights_file): model = self.gateModel model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) print(model.summary()) self.gateModel = SparkModel(model, frequency='epoch', mode='asynchronous') score = self.gateModel.master_network.evaluate(self.x_test, self.y_test, verbose=2, batch_size=50) self.gateModel.fit(self.rdd, epochs=1, batch_size=50, verbose=1) self.gateModel = self.gateModel.master_network self.gateModel.save_weights(weights_file + '.hdf5') file = '../lib/output.txt' if os.path.exists(file): append_write = 'a' else: append_write = 'w' #score = self.gateModel.evaluate(self.x_test, self.y_test, verbose=2, batch_size=50) print("------------------------------") print("Score is:" + str(score[1])) print("-------------------------------") text_file = open(file, append_write) text_file.write("Score: %s" % score[1]) text_file.close() def load_gate_weights(self, model_old, weights_file='../lib/weights/moe_full.hdf5'): model_old.load_weights(weights_file) for l in self.gateModel.layers: for b in model_old.layers: if (l.name == b.name): l.set_weights(b.get_weights()) print("loaded gate layer " + str(l.name))